sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5import itertools 6from collections import defaultdict 7 8from sqlglot import exp 9from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 10from sqlglot.helper import apply_index_offset, ensure_list, seq_get 11from sqlglot.time import format_time 12from sqlglot.tokens import Token, Tokenizer, TokenType 13from sqlglot.trie import TrieResult, in_trie, new_trie 14 15if t.TYPE_CHECKING: 16 from sqlglot._typing import E, Lit 17 from sqlglot.dialects.dialect import Dialect, DialectType 18 19 T = t.TypeVar("T") 20 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 21 22logger = logging.getLogger("sqlglot") 23 24OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 25 26 27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 38 39 40def build_like(args: t.List) -> exp.Escape | exp.Like: 41 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 42 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 43 44 45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range 57 58 59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 70 71 72def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 73 arg = seq_get(args, 0) 74 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 75 76 77def build_lower(args: t.List) -> exp.Lower | exp.Hex: 78 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 81 82 83def build_upper(args: t.List) -> exp.Upper | exp.Hex: 84 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 87 88 89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder 100 101 102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression) 111 112 113def build_pad(args: t.List, is_left: bool = True): 114 return exp.Pad( 115 this=seq_get(args, 0), 116 expression=seq_get(args, 1), 117 fill_pattern=seq_get(args, 2), 118 is_left=is_left, 119 ) 120 121 122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp 131 132 133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args) 143 144 145def build_trim(args: t.List, is_left: bool = True): 146 return exp.Trim( 147 this=seq_get(args, 0), 148 expression=seq_get(args, 1), 149 position="LEADING" if is_left else "TRAILING", 150 ) 151 152 153def build_coalesce( 154 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 155) -> exp.Coalesce: 156 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 157 158 159def build_locate_strposition(args: t.List): 160 return exp.StrPosition( 161 this=seq_get(args, 1), 162 substr=seq_get(args, 0), 163 position=seq_get(args, 2), 164 ) 165 166 167class _Parser(type): 168 def __new__(cls, clsname, bases, attrs): 169 klass = super().__new__(cls, clsname, bases, attrs) 170 171 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 172 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 173 174 return klass 175 176 177class Parser(metaclass=_Parser): 178 """ 179 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 180 181 Args: 182 error_level: The desired error level. 183 Default: ErrorLevel.IMMEDIATE 184 error_message_context: The amount of context to capture from a query string when displaying 185 the error message (in number of characters). 186 Default: 100 187 max_errors: Maximum number of error messages to include in a raised ParseError. 188 This is only relevant if error_level is ErrorLevel.RAISE. 189 Default: 3 190 """ 191 192 FUNCTIONS: t.Dict[str, t.Callable] = { 193 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 194 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 195 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 196 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 200 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 201 ), 202 "CHAR": lambda args: exp.Chr(expressions=args), 203 "CHR": lambda args: exp.Chr(expressions=args), 204 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 205 "CONCAT": lambda args, dialect: exp.Concat( 206 expressions=args, 207 safe=not dialect.STRICT_STRING_CONCAT, 208 coalesce=dialect.CONCAT_COALESCE, 209 ), 210 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONVERT_TIMEZONE": build_convert_timezone, 216 "DATE_TO_DATE_STR": lambda args: exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 221 start=seq_get(args, 0), 222 end=seq_get(args, 1), 223 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 224 ), 225 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 226 "HEX": build_hex, 227 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 228 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 229 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 230 "LIKE": build_like, 231 "LOG": build_logarithm, 232 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 233 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 234 "LOWER": build_lower, 235 "LPAD": lambda args: build_pad(args), 236 "LEFTPAD": lambda args: build_pad(args), 237 "LTRIM": lambda args: build_trim(args), 238 "MOD": build_mod, 239 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 240 "RPAD": lambda args: build_pad(args, is_left=False), 241 "RTRIM": lambda args: build_trim(args, is_left=False), 242 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 243 if len(args) != 2 244 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 245 "STRPOS": exp.StrPosition.from_arg_list, 246 "CHARINDEX": lambda args: build_locate_strposition(args), 247 "INSTR": exp.StrPosition.from_arg_list, 248 "LOCATE": lambda args: build_locate_strposition(args), 249 "TIME_TO_TIME_STR": lambda args: exp.Cast( 250 this=seq_get(args, 0), 251 to=exp.DataType(this=exp.DataType.Type.TEXT), 252 ), 253 "TO_HEX": build_hex, 254 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 255 this=exp.Cast( 256 this=seq_get(args, 0), 257 to=exp.DataType(this=exp.DataType.Type.TEXT), 258 ), 259 start=exp.Literal.number(1), 260 length=exp.Literal.number(10), 261 ), 262 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 263 "UPPER": build_upper, 264 "VAR_MAP": build_var_map, 265 } 266 267 NO_PAREN_FUNCTIONS = { 268 TokenType.CURRENT_DATE: exp.CurrentDate, 269 TokenType.CURRENT_DATETIME: exp.CurrentDate, 270 TokenType.CURRENT_TIME: exp.CurrentTime, 271 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 272 TokenType.CURRENT_USER: exp.CurrentUser, 273 } 274 275 STRUCT_TYPE_TOKENS = { 276 TokenType.NESTED, 277 TokenType.OBJECT, 278 TokenType.STRUCT, 279 TokenType.UNION, 280 } 281 282 NESTED_TYPE_TOKENS = { 283 TokenType.ARRAY, 284 TokenType.LIST, 285 TokenType.LOWCARDINALITY, 286 TokenType.MAP, 287 TokenType.NULLABLE, 288 TokenType.RANGE, 289 *STRUCT_TYPE_TOKENS, 290 } 291 292 ENUM_TYPE_TOKENS = { 293 TokenType.DYNAMIC, 294 TokenType.ENUM, 295 TokenType.ENUM8, 296 TokenType.ENUM16, 297 } 298 299 AGGREGATE_TYPE_TOKENS = { 300 TokenType.AGGREGATEFUNCTION, 301 TokenType.SIMPLEAGGREGATEFUNCTION, 302 } 303 304 TYPE_TOKENS = { 305 TokenType.BIT, 306 TokenType.BOOLEAN, 307 TokenType.TINYINT, 308 TokenType.UTINYINT, 309 TokenType.SMALLINT, 310 TokenType.USMALLINT, 311 TokenType.INT, 312 TokenType.UINT, 313 TokenType.BIGINT, 314 TokenType.UBIGINT, 315 TokenType.INT128, 316 TokenType.UINT128, 317 TokenType.INT256, 318 TokenType.UINT256, 319 TokenType.MEDIUMINT, 320 TokenType.UMEDIUMINT, 321 TokenType.FIXEDSTRING, 322 TokenType.FLOAT, 323 TokenType.DOUBLE, 324 TokenType.UDOUBLE, 325 TokenType.CHAR, 326 TokenType.NCHAR, 327 TokenType.VARCHAR, 328 TokenType.NVARCHAR, 329 TokenType.BPCHAR, 330 TokenType.TEXT, 331 TokenType.MEDIUMTEXT, 332 TokenType.LONGTEXT, 333 TokenType.BLOB, 334 TokenType.MEDIUMBLOB, 335 TokenType.LONGBLOB, 336 TokenType.BINARY, 337 TokenType.VARBINARY, 338 TokenType.JSON, 339 TokenType.JSONB, 340 TokenType.INTERVAL, 341 TokenType.TINYBLOB, 342 TokenType.TINYTEXT, 343 TokenType.TIME, 344 TokenType.TIMETZ, 345 TokenType.TIMESTAMP, 346 TokenType.TIMESTAMP_S, 347 TokenType.TIMESTAMP_MS, 348 TokenType.TIMESTAMP_NS, 349 TokenType.TIMESTAMPTZ, 350 TokenType.TIMESTAMPLTZ, 351 TokenType.TIMESTAMPNTZ, 352 TokenType.DATETIME, 353 TokenType.DATETIME2, 354 TokenType.DATETIME64, 355 TokenType.SMALLDATETIME, 356 TokenType.DATE, 357 TokenType.DATE32, 358 TokenType.INT4RANGE, 359 TokenType.INT4MULTIRANGE, 360 TokenType.INT8RANGE, 361 TokenType.INT8MULTIRANGE, 362 TokenType.NUMRANGE, 363 TokenType.NUMMULTIRANGE, 364 TokenType.TSRANGE, 365 TokenType.TSMULTIRANGE, 366 TokenType.TSTZRANGE, 367 TokenType.TSTZMULTIRANGE, 368 TokenType.DATERANGE, 369 TokenType.DATEMULTIRANGE, 370 TokenType.DECIMAL, 371 TokenType.DECIMAL32, 372 TokenType.DECIMAL64, 373 TokenType.DECIMAL128, 374 TokenType.DECIMAL256, 375 TokenType.UDECIMAL, 376 TokenType.BIGDECIMAL, 377 TokenType.UUID, 378 TokenType.GEOGRAPHY, 379 TokenType.GEOMETRY, 380 TokenType.POINT, 381 TokenType.RING, 382 TokenType.LINESTRING, 383 TokenType.MULTILINESTRING, 384 TokenType.POLYGON, 385 TokenType.MULTIPOLYGON, 386 TokenType.HLLSKETCH, 387 TokenType.HSTORE, 388 TokenType.PSEUDO_TYPE, 389 TokenType.SUPER, 390 TokenType.SERIAL, 391 TokenType.SMALLSERIAL, 392 TokenType.BIGSERIAL, 393 TokenType.XML, 394 TokenType.YEAR, 395 TokenType.USERDEFINED, 396 TokenType.MONEY, 397 TokenType.SMALLMONEY, 398 TokenType.ROWVERSION, 399 TokenType.IMAGE, 400 TokenType.VARIANT, 401 TokenType.VECTOR, 402 TokenType.VOID, 403 TokenType.OBJECT, 404 TokenType.OBJECT_IDENTIFIER, 405 TokenType.INET, 406 TokenType.IPADDRESS, 407 TokenType.IPPREFIX, 408 TokenType.IPV4, 409 TokenType.IPV6, 410 TokenType.UNKNOWN, 411 TokenType.NOTHING, 412 TokenType.NULL, 413 TokenType.NAME, 414 TokenType.TDIGEST, 415 TokenType.DYNAMIC, 416 *ENUM_TYPE_TOKENS, 417 *NESTED_TYPE_TOKENS, 418 *AGGREGATE_TYPE_TOKENS, 419 } 420 421 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 422 TokenType.BIGINT: TokenType.UBIGINT, 423 TokenType.INT: TokenType.UINT, 424 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 425 TokenType.SMALLINT: TokenType.USMALLINT, 426 TokenType.TINYINT: TokenType.UTINYINT, 427 TokenType.DECIMAL: TokenType.UDECIMAL, 428 TokenType.DOUBLE: TokenType.UDOUBLE, 429 } 430 431 SUBQUERY_PREDICATES = { 432 TokenType.ANY: exp.Any, 433 TokenType.ALL: exp.All, 434 TokenType.EXISTS: exp.Exists, 435 TokenType.SOME: exp.Any, 436 } 437 438 RESERVED_TOKENS = { 439 *Tokenizer.SINGLE_TOKENS.values(), 440 TokenType.SELECT, 441 } - {TokenType.IDENTIFIER} 442 443 DB_CREATABLES = { 444 TokenType.DATABASE, 445 TokenType.DICTIONARY, 446 TokenType.FILE_FORMAT, 447 TokenType.MODEL, 448 TokenType.NAMESPACE, 449 TokenType.SCHEMA, 450 TokenType.SEQUENCE, 451 TokenType.SINK, 452 TokenType.SOURCE, 453 TokenType.STAGE, 454 TokenType.STORAGE_INTEGRATION, 455 TokenType.STREAMLIT, 456 TokenType.TABLE, 457 TokenType.TAG, 458 TokenType.VIEW, 459 TokenType.WAREHOUSE, 460 } 461 462 CREATABLES = { 463 TokenType.COLUMN, 464 TokenType.CONSTRAINT, 465 TokenType.FOREIGN_KEY, 466 TokenType.FUNCTION, 467 TokenType.INDEX, 468 TokenType.PROCEDURE, 469 *DB_CREATABLES, 470 } 471 472 ALTERABLES = { 473 TokenType.INDEX, 474 TokenType.TABLE, 475 TokenType.VIEW, 476 } 477 478 # Tokens that can represent identifiers 479 ID_VAR_TOKENS = { 480 TokenType.ALL, 481 TokenType.ATTACH, 482 TokenType.VAR, 483 TokenType.ANTI, 484 TokenType.APPLY, 485 TokenType.ASC, 486 TokenType.ASOF, 487 TokenType.AUTO_INCREMENT, 488 TokenType.BEGIN, 489 TokenType.BPCHAR, 490 TokenType.CACHE, 491 TokenType.CASE, 492 TokenType.COLLATE, 493 TokenType.COMMAND, 494 TokenType.COMMENT, 495 TokenType.COMMIT, 496 TokenType.CONSTRAINT, 497 TokenType.COPY, 498 TokenType.CUBE, 499 TokenType.CURRENT_SCHEMA, 500 TokenType.DEFAULT, 501 TokenType.DELETE, 502 TokenType.DESC, 503 TokenType.DESCRIBE, 504 TokenType.DETACH, 505 TokenType.DICTIONARY, 506 TokenType.DIV, 507 TokenType.END, 508 TokenType.EXECUTE, 509 TokenType.EXPORT, 510 TokenType.ESCAPE, 511 TokenType.FALSE, 512 TokenType.FIRST, 513 TokenType.FILTER, 514 TokenType.FINAL, 515 TokenType.FORMAT, 516 TokenType.FULL, 517 TokenType.GET, 518 TokenType.IDENTIFIER, 519 TokenType.IS, 520 TokenType.ISNULL, 521 TokenType.INTERVAL, 522 TokenType.KEEP, 523 TokenType.KILL, 524 TokenType.LEFT, 525 TokenType.LIMIT, 526 TokenType.LOAD, 527 TokenType.MERGE, 528 TokenType.NATURAL, 529 TokenType.NEXT, 530 TokenType.OFFSET, 531 TokenType.OPERATOR, 532 TokenType.ORDINALITY, 533 TokenType.OVERLAPS, 534 TokenType.OVERWRITE, 535 TokenType.PARTITION, 536 TokenType.PERCENT, 537 TokenType.PIVOT, 538 TokenType.PRAGMA, 539 TokenType.PUT, 540 TokenType.RANGE, 541 TokenType.RECURSIVE, 542 TokenType.REFERENCES, 543 TokenType.REFRESH, 544 TokenType.RENAME, 545 TokenType.REPLACE, 546 TokenType.RIGHT, 547 TokenType.ROLLUP, 548 TokenType.ROW, 549 TokenType.ROWS, 550 TokenType.SEMI, 551 TokenType.SET, 552 TokenType.SETTINGS, 553 TokenType.SHOW, 554 TokenType.TEMPORARY, 555 TokenType.TOP, 556 TokenType.TRUE, 557 TokenType.TRUNCATE, 558 TokenType.UNIQUE, 559 TokenType.UNNEST, 560 TokenType.UNPIVOT, 561 TokenType.UPDATE, 562 TokenType.USE, 563 TokenType.VOLATILE, 564 TokenType.WINDOW, 565 *CREATABLES, 566 *SUBQUERY_PREDICATES, 567 *TYPE_TOKENS, 568 *NO_PAREN_FUNCTIONS, 569 } 570 ID_VAR_TOKENS.remove(TokenType.UNION) 571 572 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 573 TokenType.ANTI, 574 TokenType.APPLY, 575 TokenType.ASOF, 576 TokenType.FULL, 577 TokenType.LEFT, 578 TokenType.LOCK, 579 TokenType.NATURAL, 580 TokenType.RIGHT, 581 TokenType.SEMI, 582 TokenType.WINDOW, 583 } 584 585 ALIAS_TOKENS = ID_VAR_TOKENS 586 587 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 588 589 ARRAY_CONSTRUCTORS = { 590 "ARRAY": exp.Array, 591 "LIST": exp.List, 592 } 593 594 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 595 596 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 597 598 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 599 600 FUNC_TOKENS = { 601 TokenType.COLLATE, 602 TokenType.COMMAND, 603 TokenType.CURRENT_DATE, 604 TokenType.CURRENT_DATETIME, 605 TokenType.CURRENT_SCHEMA, 606 TokenType.CURRENT_TIMESTAMP, 607 TokenType.CURRENT_TIME, 608 TokenType.CURRENT_USER, 609 TokenType.FILTER, 610 TokenType.FIRST, 611 TokenType.FORMAT, 612 TokenType.GET, 613 TokenType.GLOB, 614 TokenType.IDENTIFIER, 615 TokenType.INDEX, 616 TokenType.ISNULL, 617 TokenType.ILIKE, 618 TokenType.INSERT, 619 TokenType.LIKE, 620 TokenType.MERGE, 621 TokenType.NEXT, 622 TokenType.OFFSET, 623 TokenType.PRIMARY_KEY, 624 TokenType.RANGE, 625 TokenType.REPLACE, 626 TokenType.RLIKE, 627 TokenType.ROW, 628 TokenType.UNNEST, 629 TokenType.VAR, 630 TokenType.LEFT, 631 TokenType.RIGHT, 632 TokenType.SEQUENCE, 633 TokenType.DATE, 634 TokenType.DATETIME, 635 TokenType.TABLE, 636 TokenType.TIMESTAMP, 637 TokenType.TIMESTAMPTZ, 638 TokenType.TRUNCATE, 639 TokenType.WINDOW, 640 TokenType.XOR, 641 *TYPE_TOKENS, 642 *SUBQUERY_PREDICATES, 643 } 644 645 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 646 TokenType.AND: exp.And, 647 } 648 649 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 650 TokenType.COLON_EQ: exp.PropertyEQ, 651 } 652 653 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 654 TokenType.OR: exp.Or, 655 } 656 657 EQUALITY = { 658 TokenType.EQ: exp.EQ, 659 TokenType.NEQ: exp.NEQ, 660 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 661 } 662 663 COMPARISON = { 664 TokenType.GT: exp.GT, 665 TokenType.GTE: exp.GTE, 666 TokenType.LT: exp.LT, 667 TokenType.LTE: exp.LTE, 668 } 669 670 BITWISE = { 671 TokenType.AMP: exp.BitwiseAnd, 672 TokenType.CARET: exp.BitwiseXor, 673 TokenType.PIPE: exp.BitwiseOr, 674 } 675 676 TERM = { 677 TokenType.DASH: exp.Sub, 678 TokenType.PLUS: exp.Add, 679 TokenType.MOD: exp.Mod, 680 TokenType.COLLATE: exp.Collate, 681 } 682 683 FACTOR = { 684 TokenType.DIV: exp.IntDiv, 685 TokenType.LR_ARROW: exp.Distance, 686 TokenType.SLASH: exp.Div, 687 TokenType.STAR: exp.Mul, 688 } 689 690 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 691 692 TIMES = { 693 TokenType.TIME, 694 TokenType.TIMETZ, 695 } 696 697 TIMESTAMPS = { 698 TokenType.TIMESTAMP, 699 TokenType.TIMESTAMPNTZ, 700 TokenType.TIMESTAMPTZ, 701 TokenType.TIMESTAMPLTZ, 702 *TIMES, 703 } 704 705 SET_OPERATIONS = { 706 TokenType.UNION, 707 TokenType.INTERSECT, 708 TokenType.EXCEPT, 709 } 710 711 JOIN_METHODS = { 712 TokenType.ASOF, 713 TokenType.NATURAL, 714 TokenType.POSITIONAL, 715 } 716 717 JOIN_SIDES = { 718 TokenType.LEFT, 719 TokenType.RIGHT, 720 TokenType.FULL, 721 } 722 723 JOIN_KINDS = { 724 TokenType.ANTI, 725 TokenType.CROSS, 726 TokenType.INNER, 727 TokenType.OUTER, 728 TokenType.SEMI, 729 TokenType.STRAIGHT_JOIN, 730 } 731 732 JOIN_HINTS: t.Set[str] = set() 733 734 LAMBDAS = { 735 TokenType.ARROW: lambda self, expressions: self.expression( 736 exp.Lambda, 737 this=self._replace_lambda( 738 self._parse_assignment(), 739 expressions, 740 ), 741 expressions=expressions, 742 ), 743 TokenType.FARROW: lambda self, expressions: self.expression( 744 exp.Kwarg, 745 this=exp.var(expressions[0].name), 746 expression=self._parse_assignment(), 747 ), 748 } 749 750 COLUMN_OPERATORS = { 751 TokenType.DOT: None, 752 TokenType.DOTCOLON: lambda self, this, to: self.expression( 753 exp.JSONCast, 754 this=this, 755 to=to, 756 ), 757 TokenType.DCOLON: lambda self, this, to: self.expression( 758 exp.Cast if self.STRICT_CAST else exp.TryCast, 759 this=this, 760 to=to, 761 ), 762 TokenType.ARROW: lambda self, this, path: self.expression( 763 exp.JSONExtract, 764 this=this, 765 expression=self.dialect.to_json_path(path), 766 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 767 ), 768 TokenType.DARROW: lambda self, this, path: self.expression( 769 exp.JSONExtractScalar, 770 this=this, 771 expression=self.dialect.to_json_path(path), 772 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 773 ), 774 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 775 exp.JSONBExtract, 776 this=this, 777 expression=path, 778 ), 779 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtractScalar, 781 this=this, 782 expression=path, 783 ), 784 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 785 exp.JSONBContains, 786 this=this, 787 expression=key, 788 ), 789 } 790 791 EXPRESSION_PARSERS = { 792 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 793 exp.Column: lambda self: self._parse_column(), 794 exp.Condition: lambda self: self._parse_assignment(), 795 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 796 exp.Expression: lambda self: self._parse_expression(), 797 exp.From: lambda self: self._parse_from(joins=True), 798 exp.Group: lambda self: self._parse_group(), 799 exp.Having: lambda self: self._parse_having(), 800 exp.Hint: lambda self: self._parse_hint_body(), 801 exp.Identifier: lambda self: self._parse_id_var(), 802 exp.Join: lambda self: self._parse_join(), 803 exp.Lambda: lambda self: self._parse_lambda(), 804 exp.Lateral: lambda self: self._parse_lateral(), 805 exp.Limit: lambda self: self._parse_limit(), 806 exp.Offset: lambda self: self._parse_offset(), 807 exp.Order: lambda self: self._parse_order(), 808 exp.Ordered: lambda self: self._parse_ordered(), 809 exp.Properties: lambda self: self._parse_properties(), 810 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 811 exp.Qualify: lambda self: self._parse_qualify(), 812 exp.Returning: lambda self: self._parse_returning(), 813 exp.Select: lambda self: self._parse_select(), 814 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 815 exp.Table: lambda self: self._parse_table_parts(), 816 exp.TableAlias: lambda self: self._parse_table_alias(), 817 exp.Tuple: lambda self: self._parse_value(values=False), 818 exp.Whens: lambda self: self._parse_when_matched(), 819 exp.Where: lambda self: self._parse_where(), 820 exp.Window: lambda self: self._parse_named_window(), 821 exp.With: lambda self: self._parse_with(), 822 "JOIN_TYPE": lambda self: self._parse_join_parts(), 823 } 824 825 STATEMENT_PARSERS = { 826 TokenType.ALTER: lambda self: self._parse_alter(), 827 TokenType.ANALYZE: lambda self: self._parse_analyze(), 828 TokenType.BEGIN: lambda self: self._parse_transaction(), 829 TokenType.CACHE: lambda self: self._parse_cache(), 830 TokenType.COMMENT: lambda self: self._parse_comment(), 831 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 832 TokenType.COPY: lambda self: self._parse_copy(), 833 TokenType.CREATE: lambda self: self._parse_create(), 834 TokenType.DELETE: lambda self: self._parse_delete(), 835 TokenType.DESC: lambda self: self._parse_describe(), 836 TokenType.DESCRIBE: lambda self: self._parse_describe(), 837 TokenType.DROP: lambda self: self._parse_drop(), 838 TokenType.GRANT: lambda self: self._parse_grant(), 839 TokenType.INSERT: lambda self: self._parse_insert(), 840 TokenType.KILL: lambda self: self._parse_kill(), 841 TokenType.LOAD: lambda self: self._parse_load(), 842 TokenType.MERGE: lambda self: self._parse_merge(), 843 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 844 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 845 TokenType.REFRESH: lambda self: self._parse_refresh(), 846 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 847 TokenType.SET: lambda self: self._parse_set(), 848 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 849 TokenType.UNCACHE: lambda self: self._parse_uncache(), 850 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 851 TokenType.UPDATE: lambda self: self._parse_update(), 852 TokenType.USE: lambda self: self._parse_use(), 853 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 854 } 855 856 UNARY_PARSERS = { 857 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 858 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 859 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 860 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 861 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 862 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 863 } 864 865 STRING_PARSERS = { 866 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 867 exp.RawString, this=token.text 868 ), 869 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 870 exp.National, this=token.text 871 ), 872 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 873 TokenType.STRING: lambda self, token: self.expression( 874 exp.Literal, this=token.text, is_string=True 875 ), 876 TokenType.UNICODE_STRING: lambda self, token: self.expression( 877 exp.UnicodeString, 878 this=token.text, 879 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 880 ), 881 } 882 883 NUMERIC_PARSERS = { 884 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 885 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 886 TokenType.HEX_STRING: lambda self, token: self.expression( 887 exp.HexString, 888 this=token.text, 889 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 890 ), 891 TokenType.NUMBER: lambda self, token: self.expression( 892 exp.Literal, this=token.text, is_string=False 893 ), 894 } 895 896 PRIMARY_PARSERS = { 897 **STRING_PARSERS, 898 **NUMERIC_PARSERS, 899 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 900 TokenType.NULL: lambda self, _: self.expression(exp.Null), 901 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 902 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 903 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 904 TokenType.STAR: lambda self, _: self._parse_star_ops(), 905 } 906 907 PLACEHOLDER_PARSERS = { 908 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 909 TokenType.PARAMETER: lambda self: self._parse_parameter(), 910 TokenType.COLON: lambda self: ( 911 self.expression(exp.Placeholder, this=self._prev.text) 912 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 913 else None 914 ), 915 } 916 917 RANGE_PARSERS = { 918 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 919 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 920 TokenType.GLOB: binary_range_parser(exp.Glob), 921 TokenType.ILIKE: binary_range_parser(exp.ILike), 922 TokenType.IN: lambda self, this: self._parse_in(this), 923 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 924 TokenType.IS: lambda self, this: self._parse_is(this), 925 TokenType.LIKE: binary_range_parser(exp.Like), 926 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 927 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 928 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 929 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 930 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 931 } 932 933 PIPE_SYNTAX_TRANSFORM_PARSERS = { 934 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 935 "WHERE": lambda self, query: self._parse_pipe_syntax_where(query), 936 "ORDER BY": lambda self, query: query.order_by(self._parse_order(), copy=False), 937 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 938 "OFFSET": lambda self, query: query.offset(self._parse_offset(), copy=False), 939 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 940 } 941 942 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 943 "ALLOWED_VALUES": lambda self: self.expression( 944 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 945 ), 946 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 947 "AUTO": lambda self: self._parse_auto_property(), 948 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 949 "BACKUP": lambda self: self.expression( 950 exp.BackupProperty, this=self._parse_var(any_token=True) 951 ), 952 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 953 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 954 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 955 "CHECKSUM": lambda self: self._parse_checksum(), 956 "CLUSTER BY": lambda self: self._parse_cluster(), 957 "CLUSTERED": lambda self: self._parse_clustered_by(), 958 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 959 exp.CollateProperty, **kwargs 960 ), 961 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 962 "CONTAINS": lambda self: self._parse_contains_property(), 963 "COPY": lambda self: self._parse_copy_property(), 964 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 965 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 966 "DEFINER": lambda self: self._parse_definer(), 967 "DETERMINISTIC": lambda self: self.expression( 968 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 969 ), 970 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 971 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 972 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 973 "DISTKEY": lambda self: self._parse_distkey(), 974 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 975 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 976 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 977 "ENVIRONMENT": lambda self: self.expression( 978 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 979 ), 980 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 981 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 982 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 983 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 984 "FREESPACE": lambda self: self._parse_freespace(), 985 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 986 "HEAP": lambda self: self.expression(exp.HeapProperty), 987 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 988 "IMMUTABLE": lambda self: self.expression( 989 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 990 ), 991 "INHERITS": lambda self: self.expression( 992 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 993 ), 994 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 995 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 996 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 997 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 998 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 999 "LIKE": lambda self: self._parse_create_like(), 1000 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1001 "LOCK": lambda self: self._parse_locking(), 1002 "LOCKING": lambda self: self._parse_locking(), 1003 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1004 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1005 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1006 "MODIFIES": lambda self: self._parse_modifies_property(), 1007 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1008 "NO": lambda self: self._parse_no_property(), 1009 "ON": lambda self: self._parse_on_property(), 1010 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1011 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1012 "PARTITION": lambda self: self._parse_partitioned_of(), 1013 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1014 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1015 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1016 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1017 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1018 "READS": lambda self: self._parse_reads_property(), 1019 "REMOTE": lambda self: self._parse_remote_with_connection(), 1020 "RETURNS": lambda self: self._parse_returns(), 1021 "STRICT": lambda self: self.expression(exp.StrictProperty), 1022 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1023 "ROW": lambda self: self._parse_row(), 1024 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1025 "SAMPLE": lambda self: self.expression( 1026 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1027 ), 1028 "SECURE": lambda self: self.expression(exp.SecureProperty), 1029 "SECURITY": lambda self: self._parse_security(), 1030 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1031 "SETTINGS": lambda self: self._parse_settings_property(), 1032 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1033 "SORTKEY": lambda self: self._parse_sortkey(), 1034 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1035 "STABLE": lambda self: self.expression( 1036 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1037 ), 1038 "STORED": lambda self: self._parse_stored(), 1039 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1040 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1041 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1042 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1043 "TO": lambda self: self._parse_to_table(), 1044 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1045 "TRANSFORM": lambda self: self.expression( 1046 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1047 ), 1048 "TTL": lambda self: self._parse_ttl(), 1049 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1050 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1051 "VOLATILE": lambda self: self._parse_volatile_property(), 1052 "WITH": lambda self: self._parse_with_property(), 1053 } 1054 1055 CONSTRAINT_PARSERS = { 1056 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1057 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1058 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1059 "CHARACTER SET": lambda self: self.expression( 1060 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1061 ), 1062 "CHECK": lambda self: self.expression( 1063 exp.CheckColumnConstraint, 1064 this=self._parse_wrapped(self._parse_assignment), 1065 enforced=self._match_text_seq("ENFORCED"), 1066 ), 1067 "COLLATE": lambda self: self.expression( 1068 exp.CollateColumnConstraint, 1069 this=self._parse_identifier() or self._parse_column(), 1070 ), 1071 "COMMENT": lambda self: self.expression( 1072 exp.CommentColumnConstraint, this=self._parse_string() 1073 ), 1074 "COMPRESS": lambda self: self._parse_compress(), 1075 "CLUSTERED": lambda self: self.expression( 1076 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1077 ), 1078 "NONCLUSTERED": lambda self: self.expression( 1079 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1080 ), 1081 "DEFAULT": lambda self: self.expression( 1082 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1083 ), 1084 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1085 "EPHEMERAL": lambda self: self.expression( 1086 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1087 ), 1088 "EXCLUDE": lambda self: self.expression( 1089 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1090 ), 1091 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1092 "FORMAT": lambda self: self.expression( 1093 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1094 ), 1095 "GENERATED": lambda self: self._parse_generated_as_identity(), 1096 "IDENTITY": lambda self: self._parse_auto_increment(), 1097 "INLINE": lambda self: self._parse_inline(), 1098 "LIKE": lambda self: self._parse_create_like(), 1099 "NOT": lambda self: self._parse_not_constraint(), 1100 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1101 "ON": lambda self: ( 1102 self._match(TokenType.UPDATE) 1103 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1104 ) 1105 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1106 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1107 "PERIOD": lambda self: self._parse_period_for_system_time(), 1108 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1109 "REFERENCES": lambda self: self._parse_references(match=False), 1110 "TITLE": lambda self: self.expression( 1111 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1112 ), 1113 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1114 "UNIQUE": lambda self: self._parse_unique(), 1115 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1116 "WATERMARK": lambda self: self.expression( 1117 exp.WatermarkColumnConstraint, 1118 this=self._match(TokenType.FOR) and self._parse_column(), 1119 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1120 ), 1121 "WITH": lambda self: self.expression( 1122 exp.Properties, expressions=self._parse_wrapped_properties() 1123 ), 1124 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1125 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1126 } 1127 1128 def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query: 1129 select = self._parse_select() 1130 if isinstance(select, exp.Select): 1131 return select.from_(query.subquery(copy=False), copy=False) 1132 return query 1133 1134 def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query: 1135 where = self._parse_where() 1136 return query.where(where, copy=False) 1137 1138 def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query: 1139 limit = self._parse_limit() 1140 offset = self._parse_offset() 1141 if limit: 1142 query.limit(limit, copy=False) 1143 if offset: 1144 query.offset(offset, copy=False) 1145 return query 1146 1147 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 1148 this = self._parse_assignment() 1149 if self._match_text_seq("GROUP", "AND", advance=False): 1150 return this 1151 1152 this = self._parse_alias(this) 1153 1154 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 1155 return self._parse_ordered(lambda: this) 1156 1157 return this 1158 1159 def _parse_pipe_syntax_aggregate_group_order_by( 1160 self, query: exp.Query, group_by_exists: bool = True 1161 ) -> exp.Query: 1162 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 1163 aggregates_or_groups, orders = [], [] 1164 for element in expr: 1165 if isinstance(element, exp.Ordered): 1166 this = element.this 1167 if isinstance(this, exp.Alias): 1168 element.set("this", this.args["alias"]) 1169 orders.append(element) 1170 else: 1171 this = element 1172 aggregates_or_groups.append(this) 1173 1174 if group_by_exists and isinstance(query, exp.Select): 1175 query = query.select(*aggregates_or_groups, copy=False).group_by( 1176 *[element.args.get("alias", element) for element in aggregates_or_groups], 1177 copy=False, 1178 ) 1179 else: 1180 query = exp.select(*aggregates_or_groups, copy=False).from_( 1181 query.subquery(copy=False), copy=False 1182 ) 1183 1184 if orders: 1185 return query.order_by(*orders, copy=False) 1186 1187 return query 1188 1189 def _parse_pipe_syntax_aggregate(self, query: exp.Query) -> exp.Query: 1190 self._match_text_seq("AGGREGATE") 1191 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 1192 1193 if self._match(TokenType.GROUP_BY) or ( 1194 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 1195 ): 1196 return self._parse_pipe_syntax_aggregate_group_order_by(query) 1197 1198 return query 1199 1200 def _parse_pipe_syntax_set_operator( 1201 self, query: t.Optional[exp.Query] 1202 ) -> t.Optional[exp.Query]: 1203 first_setop = self.parse_set_operation(this=query) 1204 1205 if not first_setop or not query: 1206 return None 1207 1208 first_setop.this.pop() 1209 distinct = first_setop.args.pop("distinct") 1210 1211 setops = [first_setop.expression.pop(), *self._parse_expressions()] 1212 1213 if isinstance(first_setop, exp.Union): 1214 return query.union(*setops, distinct=distinct, **first_setop.args) 1215 if isinstance(first_setop, exp.Except): 1216 return query.except_(*setops, distinct=distinct, **first_setop.args) 1217 return query.intersect(*setops, distinct=distinct, **first_setop.args) 1218 1219 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1220 klass = ( 1221 exp.PartitionedByBucket 1222 if self._prev.text.upper() == "BUCKET" 1223 else exp.PartitionByTruncate 1224 ) 1225 1226 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1227 this, expression = seq_get(args, 0), seq_get(args, 1) 1228 1229 if isinstance(this, exp.Literal): 1230 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1231 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1232 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1233 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1234 # 1235 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1236 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1237 this, expression = expression, this 1238 1239 return self.expression(klass, this=this, expression=expression) 1240 1241 ALTER_PARSERS = { 1242 "ADD": lambda self: self._parse_alter_table_add(), 1243 "AS": lambda self: self._parse_select(), 1244 "ALTER": lambda self: self._parse_alter_table_alter(), 1245 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1246 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1247 "DROP": lambda self: self._parse_alter_table_drop(), 1248 "RENAME": lambda self: self._parse_alter_table_rename(), 1249 "SET": lambda self: self._parse_alter_table_set(), 1250 "SWAP": lambda self: self.expression( 1251 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1252 ), 1253 } 1254 1255 ALTER_ALTER_PARSERS = { 1256 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1257 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1258 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1259 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1260 } 1261 1262 SCHEMA_UNNAMED_CONSTRAINTS = { 1263 "CHECK", 1264 "EXCLUDE", 1265 "FOREIGN KEY", 1266 "LIKE", 1267 "PERIOD", 1268 "PRIMARY KEY", 1269 "UNIQUE", 1270 "WATERMARK", 1271 "BUCKET", 1272 "TRUNCATE", 1273 } 1274 1275 NO_PAREN_FUNCTION_PARSERS = { 1276 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1277 "CASE": lambda self: self._parse_case(), 1278 "CONNECT_BY_ROOT": lambda self: self.expression( 1279 exp.ConnectByRoot, this=self._parse_column() 1280 ), 1281 "IF": lambda self: self._parse_if(), 1282 } 1283 1284 INVALID_FUNC_NAME_TOKENS = { 1285 TokenType.IDENTIFIER, 1286 TokenType.STRING, 1287 } 1288 1289 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1290 1291 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1292 1293 FUNCTION_PARSERS = { 1294 **{ 1295 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1296 }, 1297 **{ 1298 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1299 }, 1300 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1301 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1302 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1303 "DECODE": lambda self: self._parse_decode(), 1304 "EXTRACT": lambda self: self._parse_extract(), 1305 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1306 "GAP_FILL": lambda self: self._parse_gap_fill(), 1307 "JSON_OBJECT": lambda self: self._parse_json_object(), 1308 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1309 "JSON_TABLE": lambda self: self._parse_json_table(), 1310 "MATCH": lambda self: self._parse_match_against(), 1311 "NORMALIZE": lambda self: self._parse_normalize(), 1312 "OPENJSON": lambda self: self._parse_open_json(), 1313 "OVERLAY": lambda self: self._parse_overlay(), 1314 "POSITION": lambda self: self._parse_position(), 1315 "PREDICT": lambda self: self._parse_predict(), 1316 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1317 "STRING_AGG": lambda self: self._parse_string_agg(), 1318 "SUBSTRING": lambda self: self._parse_substring(), 1319 "TRIM": lambda self: self._parse_trim(), 1320 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1321 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1322 "XMLELEMENT": lambda self: self.expression( 1323 exp.XMLElement, 1324 this=self._match_text_seq("NAME") and self._parse_id_var(), 1325 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1326 ), 1327 "XMLTABLE": lambda self: self._parse_xml_table(), 1328 } 1329 1330 QUERY_MODIFIER_PARSERS = { 1331 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1332 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1333 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1334 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1335 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1336 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1337 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1338 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1339 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1340 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1341 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1342 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1343 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1344 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1345 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1346 TokenType.CLUSTER_BY: lambda self: ( 1347 "cluster", 1348 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1349 ), 1350 TokenType.DISTRIBUTE_BY: lambda self: ( 1351 "distribute", 1352 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1353 ), 1354 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1355 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1356 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1357 } 1358 1359 SET_PARSERS = { 1360 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1361 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1362 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1363 "TRANSACTION": lambda self: self._parse_set_transaction(), 1364 } 1365 1366 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1367 1368 TYPE_LITERAL_PARSERS = { 1369 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1370 } 1371 1372 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1373 1374 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1375 1376 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1377 1378 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1379 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1380 "ISOLATION": ( 1381 ("LEVEL", "REPEATABLE", "READ"), 1382 ("LEVEL", "READ", "COMMITTED"), 1383 ("LEVEL", "READ", "UNCOMITTED"), 1384 ("LEVEL", "SERIALIZABLE"), 1385 ), 1386 "READ": ("WRITE", "ONLY"), 1387 } 1388 1389 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1390 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1391 ) 1392 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1393 1394 CREATE_SEQUENCE: OPTIONS_TYPE = { 1395 "SCALE": ("EXTEND", "NOEXTEND"), 1396 "SHARD": ("EXTEND", "NOEXTEND"), 1397 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1398 **dict.fromkeys( 1399 ( 1400 "SESSION", 1401 "GLOBAL", 1402 "KEEP", 1403 "NOKEEP", 1404 "ORDER", 1405 "NOORDER", 1406 "NOCACHE", 1407 "CYCLE", 1408 "NOCYCLE", 1409 "NOMINVALUE", 1410 "NOMAXVALUE", 1411 "NOSCALE", 1412 "NOSHARD", 1413 ), 1414 tuple(), 1415 ), 1416 } 1417 1418 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1419 1420 USABLES: OPTIONS_TYPE = dict.fromkeys( 1421 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1422 ) 1423 1424 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1425 1426 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1427 "TYPE": ("EVOLUTION",), 1428 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1429 } 1430 1431 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1432 1433 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1434 1435 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1436 "NOT": ("ENFORCED",), 1437 "MATCH": ( 1438 "FULL", 1439 "PARTIAL", 1440 "SIMPLE", 1441 ), 1442 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1443 "USING": ( 1444 "BTREE", 1445 "HASH", 1446 ), 1447 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1448 } 1449 1450 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1451 "NO": ("OTHERS",), 1452 "CURRENT": ("ROW",), 1453 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1454 } 1455 1456 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1457 1458 CLONE_KEYWORDS = {"CLONE", "COPY"} 1459 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1460 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1461 1462 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1463 1464 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1465 1466 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1467 1468 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1469 1470 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1471 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1472 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1473 1474 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1475 1476 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1477 1478 ADD_CONSTRAINT_TOKENS = { 1479 TokenType.CONSTRAINT, 1480 TokenType.FOREIGN_KEY, 1481 TokenType.INDEX, 1482 TokenType.KEY, 1483 TokenType.PRIMARY_KEY, 1484 TokenType.UNIQUE, 1485 } 1486 1487 DISTINCT_TOKENS = {TokenType.DISTINCT} 1488 1489 NULL_TOKENS = {TokenType.NULL} 1490 1491 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1492 1493 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1494 1495 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1496 1497 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1498 1499 ODBC_DATETIME_LITERALS = { 1500 "d": exp.Date, 1501 "t": exp.Time, 1502 "ts": exp.Timestamp, 1503 } 1504 1505 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1506 1507 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1508 1509 # The style options for the DESCRIBE statement 1510 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1511 1512 # The style options for the ANALYZE statement 1513 ANALYZE_STYLES = { 1514 "BUFFER_USAGE_LIMIT", 1515 "FULL", 1516 "LOCAL", 1517 "NO_WRITE_TO_BINLOG", 1518 "SAMPLE", 1519 "SKIP_LOCKED", 1520 "VERBOSE", 1521 } 1522 1523 ANALYZE_EXPRESSION_PARSERS = { 1524 "ALL": lambda self: self._parse_analyze_columns(), 1525 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1526 "DELETE": lambda self: self._parse_analyze_delete(), 1527 "DROP": lambda self: self._parse_analyze_histogram(), 1528 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1529 "LIST": lambda self: self._parse_analyze_list(), 1530 "PREDICATE": lambda self: self._parse_analyze_columns(), 1531 "UPDATE": lambda self: self._parse_analyze_histogram(), 1532 "VALIDATE": lambda self: self._parse_analyze_validate(), 1533 } 1534 1535 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1536 1537 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1538 1539 OPERATION_MODIFIERS: t.Set[str] = set() 1540 1541 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1542 1543 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1544 1545 STRICT_CAST = True 1546 1547 PREFIXED_PIVOT_COLUMNS = False 1548 IDENTIFY_PIVOT_STRINGS = False 1549 1550 LOG_DEFAULTS_TO_LN = False 1551 1552 # Whether the table sample clause expects CSV syntax 1553 TABLESAMPLE_CSV = False 1554 1555 # The default method used for table sampling 1556 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1557 1558 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1559 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1560 1561 # Whether the TRIM function expects the characters to trim as its first argument 1562 TRIM_PATTERN_FIRST = False 1563 1564 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1565 STRING_ALIASES = False 1566 1567 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1568 MODIFIERS_ATTACHED_TO_SET_OP = True 1569 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1570 1571 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1572 NO_PAREN_IF_COMMANDS = True 1573 1574 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1575 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1576 1577 # Whether the `:` operator is used to extract a value from a VARIANT column 1578 COLON_IS_VARIANT_EXTRACT = False 1579 1580 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1581 # If this is True and '(' is not found, the keyword will be treated as an identifier 1582 VALUES_FOLLOWED_BY_PAREN = True 1583 1584 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1585 SUPPORTS_IMPLICIT_UNNEST = False 1586 1587 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1588 INTERVAL_SPANS = True 1589 1590 # Whether a PARTITION clause can follow a table reference 1591 SUPPORTS_PARTITION_SELECTION = False 1592 1593 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1594 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1595 1596 # Whether the 'AS' keyword is optional in the CTE definition syntax 1597 OPTIONAL_ALIAS_TOKEN_CTE = True 1598 1599 __slots__ = ( 1600 "error_level", 1601 "error_message_context", 1602 "max_errors", 1603 "dialect", 1604 "sql", 1605 "errors", 1606 "_tokens", 1607 "_index", 1608 "_curr", 1609 "_next", 1610 "_prev", 1611 "_prev_comments", 1612 ) 1613 1614 # Autofilled 1615 SHOW_TRIE: t.Dict = {} 1616 SET_TRIE: t.Dict = {} 1617 1618 def __init__( 1619 self, 1620 error_level: t.Optional[ErrorLevel] = None, 1621 error_message_context: int = 100, 1622 max_errors: int = 3, 1623 dialect: DialectType = None, 1624 ): 1625 from sqlglot.dialects import Dialect 1626 1627 self.error_level = error_level or ErrorLevel.IMMEDIATE 1628 self.error_message_context = error_message_context 1629 self.max_errors = max_errors 1630 self.dialect = Dialect.get_or_raise(dialect) 1631 self.reset() 1632 1633 def reset(self): 1634 self.sql = "" 1635 self.errors = [] 1636 self._tokens = [] 1637 self._index = 0 1638 self._curr = None 1639 self._next = None 1640 self._prev = None 1641 self._prev_comments = None 1642 1643 def parse( 1644 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1645 ) -> t.List[t.Optional[exp.Expression]]: 1646 """ 1647 Parses a list of tokens and returns a list of syntax trees, one tree 1648 per parsed SQL statement. 1649 1650 Args: 1651 raw_tokens: The list of tokens. 1652 sql: The original SQL string, used to produce helpful debug messages. 1653 1654 Returns: 1655 The list of the produced syntax trees. 1656 """ 1657 return self._parse( 1658 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1659 ) 1660 1661 def parse_into( 1662 self, 1663 expression_types: exp.IntoType, 1664 raw_tokens: t.List[Token], 1665 sql: t.Optional[str] = None, 1666 ) -> t.List[t.Optional[exp.Expression]]: 1667 """ 1668 Parses a list of tokens into a given Expression type. If a collection of Expression 1669 types is given instead, this method will try to parse the token list into each one 1670 of them, stopping at the first for which the parsing succeeds. 1671 1672 Args: 1673 expression_types: The expression type(s) to try and parse the token list into. 1674 raw_tokens: The list of tokens. 1675 sql: The original SQL string, used to produce helpful debug messages. 1676 1677 Returns: 1678 The target Expression. 1679 """ 1680 errors = [] 1681 for expression_type in ensure_list(expression_types): 1682 parser = self.EXPRESSION_PARSERS.get(expression_type) 1683 if not parser: 1684 raise TypeError(f"No parser registered for {expression_type}") 1685 1686 try: 1687 return self._parse(parser, raw_tokens, sql) 1688 except ParseError as e: 1689 e.errors[0]["into_expression"] = expression_type 1690 errors.append(e) 1691 1692 raise ParseError( 1693 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1694 errors=merge_errors(errors), 1695 ) from errors[-1] 1696 1697 def _parse( 1698 self, 1699 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1700 raw_tokens: t.List[Token], 1701 sql: t.Optional[str] = None, 1702 ) -> t.List[t.Optional[exp.Expression]]: 1703 self.reset() 1704 self.sql = sql or "" 1705 1706 total = len(raw_tokens) 1707 chunks: t.List[t.List[Token]] = [[]] 1708 1709 for i, token in enumerate(raw_tokens): 1710 if token.token_type == TokenType.SEMICOLON: 1711 if token.comments: 1712 chunks.append([token]) 1713 1714 if i < total - 1: 1715 chunks.append([]) 1716 else: 1717 chunks[-1].append(token) 1718 1719 expressions = [] 1720 1721 for tokens in chunks: 1722 self._index = -1 1723 self._tokens = tokens 1724 self._advance() 1725 1726 expressions.append(parse_method(self)) 1727 1728 if self._index < len(self._tokens): 1729 self.raise_error("Invalid expression / Unexpected token") 1730 1731 self.check_errors() 1732 1733 return expressions 1734 1735 def check_errors(self) -> None: 1736 """Logs or raises any found errors, depending on the chosen error level setting.""" 1737 if self.error_level == ErrorLevel.WARN: 1738 for error in self.errors: 1739 logger.error(str(error)) 1740 elif self.error_level == ErrorLevel.RAISE and self.errors: 1741 raise ParseError( 1742 concat_messages(self.errors, self.max_errors), 1743 errors=merge_errors(self.errors), 1744 ) 1745 1746 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1747 """ 1748 Appends an error in the list of recorded errors or raises it, depending on the chosen 1749 error level setting. 1750 """ 1751 token = token or self._curr or self._prev or Token.string("") 1752 start = token.start 1753 end = token.end + 1 1754 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1755 highlight = self.sql[start:end] 1756 end_context = self.sql[end : end + self.error_message_context] 1757 1758 error = ParseError.new( 1759 f"{message}. Line {token.line}, Col: {token.col}.\n" 1760 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1761 description=message, 1762 line=token.line, 1763 col=token.col, 1764 start_context=start_context, 1765 highlight=highlight, 1766 end_context=end_context, 1767 ) 1768 1769 if self.error_level == ErrorLevel.IMMEDIATE: 1770 raise error 1771 1772 self.errors.append(error) 1773 1774 def expression( 1775 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1776 ) -> E: 1777 """ 1778 Creates a new, validated Expression. 1779 1780 Args: 1781 exp_class: The expression class to instantiate. 1782 comments: An optional list of comments to attach to the expression. 1783 kwargs: The arguments to set for the expression along with their respective values. 1784 1785 Returns: 1786 The target expression. 1787 """ 1788 instance = exp_class(**kwargs) 1789 instance.add_comments(comments) if comments else self._add_comments(instance) 1790 return self.validate_expression(instance) 1791 1792 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1793 if expression and self._prev_comments: 1794 expression.add_comments(self._prev_comments) 1795 self._prev_comments = None 1796 1797 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1798 """ 1799 Validates an Expression, making sure that all its mandatory arguments are set. 1800 1801 Args: 1802 expression: The expression to validate. 1803 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1804 1805 Returns: 1806 The validated expression. 1807 """ 1808 if self.error_level != ErrorLevel.IGNORE: 1809 for error_message in expression.error_messages(args): 1810 self.raise_error(error_message) 1811 1812 return expression 1813 1814 def _find_sql(self, start: Token, end: Token) -> str: 1815 return self.sql[start.start : end.end + 1] 1816 1817 def _is_connected(self) -> bool: 1818 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1819 1820 def _advance(self, times: int = 1) -> None: 1821 self._index += times 1822 self._curr = seq_get(self._tokens, self._index) 1823 self._next = seq_get(self._tokens, self._index + 1) 1824 1825 if self._index > 0: 1826 self._prev = self._tokens[self._index - 1] 1827 self._prev_comments = self._prev.comments 1828 else: 1829 self._prev = None 1830 self._prev_comments = None 1831 1832 def _retreat(self, index: int) -> None: 1833 if index != self._index: 1834 self._advance(index - self._index) 1835 1836 def _warn_unsupported(self) -> None: 1837 if len(self._tokens) <= 1: 1838 return 1839 1840 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1841 # interested in emitting a warning for the one being currently processed. 1842 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1843 1844 logger.warning( 1845 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1846 ) 1847 1848 def _parse_command(self) -> exp.Command: 1849 self._warn_unsupported() 1850 return self.expression( 1851 exp.Command, 1852 comments=self._prev_comments, 1853 this=self._prev.text.upper(), 1854 expression=self._parse_string(), 1855 ) 1856 1857 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1858 """ 1859 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1860 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1861 solve this by setting & resetting the parser state accordingly 1862 """ 1863 index = self._index 1864 error_level = self.error_level 1865 1866 self.error_level = ErrorLevel.IMMEDIATE 1867 try: 1868 this = parse_method() 1869 except ParseError: 1870 this = None 1871 finally: 1872 if not this or retreat: 1873 self._retreat(index) 1874 self.error_level = error_level 1875 1876 return this 1877 1878 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1879 start = self._prev 1880 exists = self._parse_exists() if allow_exists else None 1881 1882 self._match(TokenType.ON) 1883 1884 materialized = self._match_text_seq("MATERIALIZED") 1885 kind = self._match_set(self.CREATABLES) and self._prev 1886 if not kind: 1887 return self._parse_as_command(start) 1888 1889 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1890 this = self._parse_user_defined_function(kind=kind.token_type) 1891 elif kind.token_type == TokenType.TABLE: 1892 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1893 elif kind.token_type == TokenType.COLUMN: 1894 this = self._parse_column() 1895 else: 1896 this = self._parse_id_var() 1897 1898 self._match(TokenType.IS) 1899 1900 return self.expression( 1901 exp.Comment, 1902 this=this, 1903 kind=kind.text, 1904 expression=self._parse_string(), 1905 exists=exists, 1906 materialized=materialized, 1907 ) 1908 1909 def _parse_to_table( 1910 self, 1911 ) -> exp.ToTableProperty: 1912 table = self._parse_table_parts(schema=True) 1913 return self.expression(exp.ToTableProperty, this=table) 1914 1915 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1916 def _parse_ttl(self) -> exp.Expression: 1917 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1918 this = self._parse_bitwise() 1919 1920 if self._match_text_seq("DELETE"): 1921 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1922 if self._match_text_seq("RECOMPRESS"): 1923 return self.expression( 1924 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1925 ) 1926 if self._match_text_seq("TO", "DISK"): 1927 return self.expression( 1928 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1929 ) 1930 if self._match_text_seq("TO", "VOLUME"): 1931 return self.expression( 1932 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1933 ) 1934 1935 return this 1936 1937 expressions = self._parse_csv(_parse_ttl_action) 1938 where = self._parse_where() 1939 group = self._parse_group() 1940 1941 aggregates = None 1942 if group and self._match(TokenType.SET): 1943 aggregates = self._parse_csv(self._parse_set_item) 1944 1945 return self.expression( 1946 exp.MergeTreeTTL, 1947 expressions=expressions, 1948 where=where, 1949 group=group, 1950 aggregates=aggregates, 1951 ) 1952 1953 def _parse_statement(self) -> t.Optional[exp.Expression]: 1954 if self._curr is None: 1955 return None 1956 1957 if self._match_set(self.STATEMENT_PARSERS): 1958 comments = self._prev_comments 1959 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1960 stmt.add_comments(comments, prepend=True) 1961 return stmt 1962 1963 if self._match_set(self.dialect.tokenizer.COMMANDS): 1964 return self._parse_command() 1965 1966 expression = self._parse_expression() 1967 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1968 return self._parse_query_modifiers(expression) 1969 1970 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1971 start = self._prev 1972 temporary = self._match(TokenType.TEMPORARY) 1973 materialized = self._match_text_seq("MATERIALIZED") 1974 1975 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1976 if not kind: 1977 return self._parse_as_command(start) 1978 1979 concurrently = self._match_text_seq("CONCURRENTLY") 1980 if_exists = exists or self._parse_exists() 1981 1982 if kind == "COLUMN": 1983 this = self._parse_column() 1984 else: 1985 this = self._parse_table_parts( 1986 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1987 ) 1988 1989 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1990 1991 if self._match(TokenType.L_PAREN, advance=False): 1992 expressions = self._parse_wrapped_csv(self._parse_types) 1993 else: 1994 expressions = None 1995 1996 return self.expression( 1997 exp.Drop, 1998 exists=if_exists, 1999 this=this, 2000 expressions=expressions, 2001 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2002 temporary=temporary, 2003 materialized=materialized, 2004 cascade=self._match_text_seq("CASCADE"), 2005 constraints=self._match_text_seq("CONSTRAINTS"), 2006 purge=self._match_text_seq("PURGE"), 2007 cluster=cluster, 2008 concurrently=concurrently, 2009 ) 2010 2011 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 2012 return ( 2013 self._match_text_seq("IF") 2014 and (not not_ or self._match(TokenType.NOT)) 2015 and self._match(TokenType.EXISTS) 2016 ) 2017 2018 def _parse_create(self) -> exp.Create | exp.Command: 2019 # Note: this can't be None because we've matched a statement parser 2020 start = self._prev 2021 2022 replace = ( 2023 start.token_type == TokenType.REPLACE 2024 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2025 or self._match_pair(TokenType.OR, TokenType.ALTER) 2026 ) 2027 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2028 2029 unique = self._match(TokenType.UNIQUE) 2030 2031 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2032 clustered = True 2033 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2034 "COLUMNSTORE" 2035 ): 2036 clustered = False 2037 else: 2038 clustered = None 2039 2040 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2041 self._advance() 2042 2043 properties = None 2044 create_token = self._match_set(self.CREATABLES) and self._prev 2045 2046 if not create_token: 2047 # exp.Properties.Location.POST_CREATE 2048 properties = self._parse_properties() 2049 create_token = self._match_set(self.CREATABLES) and self._prev 2050 2051 if not properties or not create_token: 2052 return self._parse_as_command(start) 2053 2054 concurrently = self._match_text_seq("CONCURRENTLY") 2055 exists = self._parse_exists(not_=True) 2056 this = None 2057 expression: t.Optional[exp.Expression] = None 2058 indexes = None 2059 no_schema_binding = None 2060 begin = None 2061 end = None 2062 clone = None 2063 2064 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2065 nonlocal properties 2066 if properties and temp_props: 2067 properties.expressions.extend(temp_props.expressions) 2068 elif temp_props: 2069 properties = temp_props 2070 2071 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2072 this = self._parse_user_defined_function(kind=create_token.token_type) 2073 2074 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2075 extend_props(self._parse_properties()) 2076 2077 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2078 extend_props(self._parse_properties()) 2079 2080 if not expression: 2081 if self._match(TokenType.COMMAND): 2082 expression = self._parse_as_command(self._prev) 2083 else: 2084 begin = self._match(TokenType.BEGIN) 2085 return_ = self._match_text_seq("RETURN") 2086 2087 if self._match(TokenType.STRING, advance=False): 2088 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2089 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2090 expression = self._parse_string() 2091 extend_props(self._parse_properties()) 2092 else: 2093 expression = self._parse_user_defined_function_expression() 2094 2095 end = self._match_text_seq("END") 2096 2097 if return_: 2098 expression = self.expression(exp.Return, this=expression) 2099 elif create_token.token_type == TokenType.INDEX: 2100 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2101 if not self._match(TokenType.ON): 2102 index = self._parse_id_var() 2103 anonymous = False 2104 else: 2105 index = None 2106 anonymous = True 2107 2108 this = self._parse_index(index=index, anonymous=anonymous) 2109 elif create_token.token_type in self.DB_CREATABLES: 2110 table_parts = self._parse_table_parts( 2111 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2112 ) 2113 2114 # exp.Properties.Location.POST_NAME 2115 self._match(TokenType.COMMA) 2116 extend_props(self._parse_properties(before=True)) 2117 2118 this = self._parse_schema(this=table_parts) 2119 2120 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2121 extend_props(self._parse_properties()) 2122 2123 has_alias = self._match(TokenType.ALIAS) 2124 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2125 # exp.Properties.Location.POST_ALIAS 2126 extend_props(self._parse_properties()) 2127 2128 if create_token.token_type == TokenType.SEQUENCE: 2129 expression = self._parse_types() 2130 extend_props(self._parse_properties()) 2131 else: 2132 expression = self._parse_ddl_select() 2133 2134 # Some dialects also support using a table as an alias instead of a SELECT. 2135 # Here we fallback to this as an alternative. 2136 if not expression and has_alias: 2137 expression = self._try_parse(self._parse_table_parts) 2138 2139 if create_token.token_type == TokenType.TABLE: 2140 # exp.Properties.Location.POST_EXPRESSION 2141 extend_props(self._parse_properties()) 2142 2143 indexes = [] 2144 while True: 2145 index = self._parse_index() 2146 2147 # exp.Properties.Location.POST_INDEX 2148 extend_props(self._parse_properties()) 2149 if not index: 2150 break 2151 else: 2152 self._match(TokenType.COMMA) 2153 indexes.append(index) 2154 elif create_token.token_type == TokenType.VIEW: 2155 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2156 no_schema_binding = True 2157 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2158 extend_props(self._parse_properties()) 2159 2160 shallow = self._match_text_seq("SHALLOW") 2161 2162 if self._match_texts(self.CLONE_KEYWORDS): 2163 copy = self._prev.text.lower() == "copy" 2164 clone = self.expression( 2165 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2166 ) 2167 2168 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2169 return self._parse_as_command(start) 2170 2171 create_kind_text = create_token.text.upper() 2172 return self.expression( 2173 exp.Create, 2174 this=this, 2175 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2176 replace=replace, 2177 refresh=refresh, 2178 unique=unique, 2179 expression=expression, 2180 exists=exists, 2181 properties=properties, 2182 indexes=indexes, 2183 no_schema_binding=no_schema_binding, 2184 begin=begin, 2185 end=end, 2186 clone=clone, 2187 concurrently=concurrently, 2188 clustered=clustered, 2189 ) 2190 2191 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2192 seq = exp.SequenceProperties() 2193 2194 options = [] 2195 index = self._index 2196 2197 while self._curr: 2198 self._match(TokenType.COMMA) 2199 if self._match_text_seq("INCREMENT"): 2200 self._match_text_seq("BY") 2201 self._match_text_seq("=") 2202 seq.set("increment", self._parse_term()) 2203 elif self._match_text_seq("MINVALUE"): 2204 seq.set("minvalue", self._parse_term()) 2205 elif self._match_text_seq("MAXVALUE"): 2206 seq.set("maxvalue", self._parse_term()) 2207 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2208 self._match_text_seq("=") 2209 seq.set("start", self._parse_term()) 2210 elif self._match_text_seq("CACHE"): 2211 # T-SQL allows empty CACHE which is initialized dynamically 2212 seq.set("cache", self._parse_number() or True) 2213 elif self._match_text_seq("OWNED", "BY"): 2214 # "OWNED BY NONE" is the default 2215 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2216 else: 2217 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2218 if opt: 2219 options.append(opt) 2220 else: 2221 break 2222 2223 seq.set("options", options if options else None) 2224 return None if self._index == index else seq 2225 2226 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2227 # only used for teradata currently 2228 self._match(TokenType.COMMA) 2229 2230 kwargs = { 2231 "no": self._match_text_seq("NO"), 2232 "dual": self._match_text_seq("DUAL"), 2233 "before": self._match_text_seq("BEFORE"), 2234 "default": self._match_text_seq("DEFAULT"), 2235 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2236 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2237 "after": self._match_text_seq("AFTER"), 2238 "minimum": self._match_texts(("MIN", "MINIMUM")), 2239 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2240 } 2241 2242 if self._match_texts(self.PROPERTY_PARSERS): 2243 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2244 try: 2245 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2246 except TypeError: 2247 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2248 2249 return None 2250 2251 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2252 return self._parse_wrapped_csv(self._parse_property) 2253 2254 def _parse_property(self) -> t.Optional[exp.Expression]: 2255 if self._match_texts(self.PROPERTY_PARSERS): 2256 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2257 2258 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2259 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2260 2261 if self._match_text_seq("COMPOUND", "SORTKEY"): 2262 return self._parse_sortkey(compound=True) 2263 2264 if self._match_text_seq("SQL", "SECURITY"): 2265 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2266 2267 index = self._index 2268 key = self._parse_column() 2269 2270 if not self._match(TokenType.EQ): 2271 self._retreat(index) 2272 return self._parse_sequence_properties() 2273 2274 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2275 if isinstance(key, exp.Column): 2276 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2277 2278 value = self._parse_bitwise() or self._parse_var(any_token=True) 2279 2280 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2281 if isinstance(value, exp.Column): 2282 value = exp.var(value.name) 2283 2284 return self.expression(exp.Property, this=key, value=value) 2285 2286 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2287 if self._match_text_seq("BY"): 2288 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2289 2290 self._match(TokenType.ALIAS) 2291 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2292 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2293 2294 return self.expression( 2295 exp.FileFormatProperty, 2296 this=( 2297 self.expression( 2298 exp.InputOutputFormat, 2299 input_format=input_format, 2300 output_format=output_format, 2301 ) 2302 if input_format or output_format 2303 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2304 ), 2305 ) 2306 2307 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2308 field = self._parse_field() 2309 if isinstance(field, exp.Identifier) and not field.quoted: 2310 field = exp.var(field) 2311 2312 return field 2313 2314 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2315 self._match(TokenType.EQ) 2316 self._match(TokenType.ALIAS) 2317 2318 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2319 2320 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2321 properties = [] 2322 while True: 2323 if before: 2324 prop = self._parse_property_before() 2325 else: 2326 prop = self._parse_property() 2327 if not prop: 2328 break 2329 for p in ensure_list(prop): 2330 properties.append(p) 2331 2332 if properties: 2333 return self.expression(exp.Properties, expressions=properties) 2334 2335 return None 2336 2337 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2338 return self.expression( 2339 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2340 ) 2341 2342 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2343 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2344 security_specifier = self._prev.text.upper() 2345 return self.expression(exp.SecurityProperty, this=security_specifier) 2346 return None 2347 2348 def _parse_settings_property(self) -> exp.SettingsProperty: 2349 return self.expression( 2350 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2351 ) 2352 2353 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2354 if self._index >= 2: 2355 pre_volatile_token = self._tokens[self._index - 2] 2356 else: 2357 pre_volatile_token = None 2358 2359 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2360 return exp.VolatileProperty() 2361 2362 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2363 2364 def _parse_retention_period(self) -> exp.Var: 2365 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2366 number = self._parse_number() 2367 number_str = f"{number} " if number else "" 2368 unit = self._parse_var(any_token=True) 2369 return exp.var(f"{number_str}{unit}") 2370 2371 def _parse_system_versioning_property( 2372 self, with_: bool = False 2373 ) -> exp.WithSystemVersioningProperty: 2374 self._match(TokenType.EQ) 2375 prop = self.expression( 2376 exp.WithSystemVersioningProperty, 2377 **{ # type: ignore 2378 "on": True, 2379 "with": with_, 2380 }, 2381 ) 2382 2383 if self._match_text_seq("OFF"): 2384 prop.set("on", False) 2385 return prop 2386 2387 self._match(TokenType.ON) 2388 if self._match(TokenType.L_PAREN): 2389 while self._curr and not self._match(TokenType.R_PAREN): 2390 if self._match_text_seq("HISTORY_TABLE", "="): 2391 prop.set("this", self._parse_table_parts()) 2392 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2393 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2394 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2395 prop.set("retention_period", self._parse_retention_period()) 2396 2397 self._match(TokenType.COMMA) 2398 2399 return prop 2400 2401 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2402 self._match(TokenType.EQ) 2403 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2404 prop = self.expression(exp.DataDeletionProperty, on=on) 2405 2406 if self._match(TokenType.L_PAREN): 2407 while self._curr and not self._match(TokenType.R_PAREN): 2408 if self._match_text_seq("FILTER_COLUMN", "="): 2409 prop.set("filter_column", self._parse_column()) 2410 elif self._match_text_seq("RETENTION_PERIOD", "="): 2411 prop.set("retention_period", self._parse_retention_period()) 2412 2413 self._match(TokenType.COMMA) 2414 2415 return prop 2416 2417 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2418 kind = "HASH" 2419 expressions: t.Optional[t.List[exp.Expression]] = None 2420 if self._match_text_seq("BY", "HASH"): 2421 expressions = self._parse_wrapped_csv(self._parse_id_var) 2422 elif self._match_text_seq("BY", "RANDOM"): 2423 kind = "RANDOM" 2424 2425 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2426 buckets: t.Optional[exp.Expression] = None 2427 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2428 buckets = self._parse_number() 2429 2430 return self.expression( 2431 exp.DistributedByProperty, 2432 expressions=expressions, 2433 kind=kind, 2434 buckets=buckets, 2435 order=self._parse_order(), 2436 ) 2437 2438 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2439 self._match_text_seq("KEY") 2440 expressions = self._parse_wrapped_id_vars() 2441 return self.expression(expr_type, expressions=expressions) 2442 2443 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2444 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2445 prop = self._parse_system_versioning_property(with_=True) 2446 self._match_r_paren() 2447 return prop 2448 2449 if self._match(TokenType.L_PAREN, advance=False): 2450 return self._parse_wrapped_properties() 2451 2452 if self._match_text_seq("JOURNAL"): 2453 return self._parse_withjournaltable() 2454 2455 if self._match_texts(self.VIEW_ATTRIBUTES): 2456 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2457 2458 if self._match_text_seq("DATA"): 2459 return self._parse_withdata(no=False) 2460 elif self._match_text_seq("NO", "DATA"): 2461 return self._parse_withdata(no=True) 2462 2463 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2464 return self._parse_serde_properties(with_=True) 2465 2466 if self._match(TokenType.SCHEMA): 2467 return self.expression( 2468 exp.WithSchemaBindingProperty, 2469 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2470 ) 2471 2472 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2473 return self.expression( 2474 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2475 ) 2476 2477 if not self._next: 2478 return None 2479 2480 return self._parse_withisolatedloading() 2481 2482 def _parse_procedure_option(self) -> exp.Expression | None: 2483 if self._match_text_seq("EXECUTE", "AS"): 2484 return self.expression( 2485 exp.ExecuteAsProperty, 2486 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2487 or self._parse_string(), 2488 ) 2489 2490 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2491 2492 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2493 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2494 self._match(TokenType.EQ) 2495 2496 user = self._parse_id_var() 2497 self._match(TokenType.PARAMETER) 2498 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2499 2500 if not user or not host: 2501 return None 2502 2503 return exp.DefinerProperty(this=f"{user}@{host}") 2504 2505 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2506 self._match(TokenType.TABLE) 2507 self._match(TokenType.EQ) 2508 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2509 2510 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2511 return self.expression(exp.LogProperty, no=no) 2512 2513 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2514 return self.expression(exp.JournalProperty, **kwargs) 2515 2516 def _parse_checksum(self) -> exp.ChecksumProperty: 2517 self._match(TokenType.EQ) 2518 2519 on = None 2520 if self._match(TokenType.ON): 2521 on = True 2522 elif self._match_text_seq("OFF"): 2523 on = False 2524 2525 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2526 2527 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2528 return self.expression( 2529 exp.Cluster, 2530 expressions=( 2531 self._parse_wrapped_csv(self._parse_ordered) 2532 if wrapped 2533 else self._parse_csv(self._parse_ordered) 2534 ), 2535 ) 2536 2537 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2538 self._match_text_seq("BY") 2539 2540 self._match_l_paren() 2541 expressions = self._parse_csv(self._parse_column) 2542 self._match_r_paren() 2543 2544 if self._match_text_seq("SORTED", "BY"): 2545 self._match_l_paren() 2546 sorted_by = self._parse_csv(self._parse_ordered) 2547 self._match_r_paren() 2548 else: 2549 sorted_by = None 2550 2551 self._match(TokenType.INTO) 2552 buckets = self._parse_number() 2553 self._match_text_seq("BUCKETS") 2554 2555 return self.expression( 2556 exp.ClusteredByProperty, 2557 expressions=expressions, 2558 sorted_by=sorted_by, 2559 buckets=buckets, 2560 ) 2561 2562 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2563 if not self._match_text_seq("GRANTS"): 2564 self._retreat(self._index - 1) 2565 return None 2566 2567 return self.expression(exp.CopyGrantsProperty) 2568 2569 def _parse_freespace(self) -> exp.FreespaceProperty: 2570 self._match(TokenType.EQ) 2571 return self.expression( 2572 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2573 ) 2574 2575 def _parse_mergeblockratio( 2576 self, no: bool = False, default: bool = False 2577 ) -> exp.MergeBlockRatioProperty: 2578 if self._match(TokenType.EQ): 2579 return self.expression( 2580 exp.MergeBlockRatioProperty, 2581 this=self._parse_number(), 2582 percent=self._match(TokenType.PERCENT), 2583 ) 2584 2585 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2586 2587 def _parse_datablocksize( 2588 self, 2589 default: t.Optional[bool] = None, 2590 minimum: t.Optional[bool] = None, 2591 maximum: t.Optional[bool] = None, 2592 ) -> exp.DataBlocksizeProperty: 2593 self._match(TokenType.EQ) 2594 size = self._parse_number() 2595 2596 units = None 2597 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2598 units = self._prev.text 2599 2600 return self.expression( 2601 exp.DataBlocksizeProperty, 2602 size=size, 2603 units=units, 2604 default=default, 2605 minimum=minimum, 2606 maximum=maximum, 2607 ) 2608 2609 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2610 self._match(TokenType.EQ) 2611 always = self._match_text_seq("ALWAYS") 2612 manual = self._match_text_seq("MANUAL") 2613 never = self._match_text_seq("NEVER") 2614 default = self._match_text_seq("DEFAULT") 2615 2616 autotemp = None 2617 if self._match_text_seq("AUTOTEMP"): 2618 autotemp = self._parse_schema() 2619 2620 return self.expression( 2621 exp.BlockCompressionProperty, 2622 always=always, 2623 manual=manual, 2624 never=never, 2625 default=default, 2626 autotemp=autotemp, 2627 ) 2628 2629 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2630 index = self._index 2631 no = self._match_text_seq("NO") 2632 concurrent = self._match_text_seq("CONCURRENT") 2633 2634 if not self._match_text_seq("ISOLATED", "LOADING"): 2635 self._retreat(index) 2636 return None 2637 2638 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2639 return self.expression( 2640 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2641 ) 2642 2643 def _parse_locking(self) -> exp.LockingProperty: 2644 if self._match(TokenType.TABLE): 2645 kind = "TABLE" 2646 elif self._match(TokenType.VIEW): 2647 kind = "VIEW" 2648 elif self._match(TokenType.ROW): 2649 kind = "ROW" 2650 elif self._match_text_seq("DATABASE"): 2651 kind = "DATABASE" 2652 else: 2653 kind = None 2654 2655 if kind in ("DATABASE", "TABLE", "VIEW"): 2656 this = self._parse_table_parts() 2657 else: 2658 this = None 2659 2660 if self._match(TokenType.FOR): 2661 for_or_in = "FOR" 2662 elif self._match(TokenType.IN): 2663 for_or_in = "IN" 2664 else: 2665 for_or_in = None 2666 2667 if self._match_text_seq("ACCESS"): 2668 lock_type = "ACCESS" 2669 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2670 lock_type = "EXCLUSIVE" 2671 elif self._match_text_seq("SHARE"): 2672 lock_type = "SHARE" 2673 elif self._match_text_seq("READ"): 2674 lock_type = "READ" 2675 elif self._match_text_seq("WRITE"): 2676 lock_type = "WRITE" 2677 elif self._match_text_seq("CHECKSUM"): 2678 lock_type = "CHECKSUM" 2679 else: 2680 lock_type = None 2681 2682 override = self._match_text_seq("OVERRIDE") 2683 2684 return self.expression( 2685 exp.LockingProperty, 2686 this=this, 2687 kind=kind, 2688 for_or_in=for_or_in, 2689 lock_type=lock_type, 2690 override=override, 2691 ) 2692 2693 def _parse_partition_by(self) -> t.List[exp.Expression]: 2694 if self._match(TokenType.PARTITION_BY): 2695 return self._parse_csv(self._parse_assignment) 2696 return [] 2697 2698 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2699 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2700 if self._match_text_seq("MINVALUE"): 2701 return exp.var("MINVALUE") 2702 if self._match_text_seq("MAXVALUE"): 2703 return exp.var("MAXVALUE") 2704 return self._parse_bitwise() 2705 2706 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2707 expression = None 2708 from_expressions = None 2709 to_expressions = None 2710 2711 if self._match(TokenType.IN): 2712 this = self._parse_wrapped_csv(self._parse_bitwise) 2713 elif self._match(TokenType.FROM): 2714 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2715 self._match_text_seq("TO") 2716 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2717 elif self._match_text_seq("WITH", "(", "MODULUS"): 2718 this = self._parse_number() 2719 self._match_text_seq(",", "REMAINDER") 2720 expression = self._parse_number() 2721 self._match_r_paren() 2722 else: 2723 self.raise_error("Failed to parse partition bound spec.") 2724 2725 return self.expression( 2726 exp.PartitionBoundSpec, 2727 this=this, 2728 expression=expression, 2729 from_expressions=from_expressions, 2730 to_expressions=to_expressions, 2731 ) 2732 2733 # https://www.postgresql.org/docs/current/sql-createtable.html 2734 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2735 if not self._match_text_seq("OF"): 2736 self._retreat(self._index - 1) 2737 return None 2738 2739 this = self._parse_table(schema=True) 2740 2741 if self._match(TokenType.DEFAULT): 2742 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2743 elif self._match_text_seq("FOR", "VALUES"): 2744 expression = self._parse_partition_bound_spec() 2745 else: 2746 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2747 2748 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2749 2750 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2751 self._match(TokenType.EQ) 2752 return self.expression( 2753 exp.PartitionedByProperty, 2754 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2755 ) 2756 2757 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2758 if self._match_text_seq("AND", "STATISTICS"): 2759 statistics = True 2760 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2761 statistics = False 2762 else: 2763 statistics = None 2764 2765 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2766 2767 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2768 if self._match_text_seq("SQL"): 2769 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2770 return None 2771 2772 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2773 if self._match_text_seq("SQL", "DATA"): 2774 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2775 return None 2776 2777 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2778 if self._match_text_seq("PRIMARY", "INDEX"): 2779 return exp.NoPrimaryIndexProperty() 2780 if self._match_text_seq("SQL"): 2781 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2782 return None 2783 2784 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2785 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2786 return exp.OnCommitProperty() 2787 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2788 return exp.OnCommitProperty(delete=True) 2789 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2790 2791 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2792 if self._match_text_seq("SQL", "DATA"): 2793 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2794 return None 2795 2796 def _parse_distkey(self) -> exp.DistKeyProperty: 2797 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2798 2799 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2800 table = self._parse_table(schema=True) 2801 2802 options = [] 2803 while self._match_texts(("INCLUDING", "EXCLUDING")): 2804 this = self._prev.text.upper() 2805 2806 id_var = self._parse_id_var() 2807 if not id_var: 2808 return None 2809 2810 options.append( 2811 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2812 ) 2813 2814 return self.expression(exp.LikeProperty, this=table, expressions=options) 2815 2816 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2817 return self.expression( 2818 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2819 ) 2820 2821 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2822 self._match(TokenType.EQ) 2823 return self.expression( 2824 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2825 ) 2826 2827 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2828 self._match_text_seq("WITH", "CONNECTION") 2829 return self.expression( 2830 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2831 ) 2832 2833 def _parse_returns(self) -> exp.ReturnsProperty: 2834 value: t.Optional[exp.Expression] 2835 null = None 2836 is_table = self._match(TokenType.TABLE) 2837 2838 if is_table: 2839 if self._match(TokenType.LT): 2840 value = self.expression( 2841 exp.Schema, 2842 this="TABLE", 2843 expressions=self._parse_csv(self._parse_struct_types), 2844 ) 2845 if not self._match(TokenType.GT): 2846 self.raise_error("Expecting >") 2847 else: 2848 value = self._parse_schema(exp.var("TABLE")) 2849 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2850 null = True 2851 value = None 2852 else: 2853 value = self._parse_types() 2854 2855 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2856 2857 def _parse_describe(self) -> exp.Describe: 2858 kind = self._match_set(self.CREATABLES) and self._prev.text 2859 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2860 if self._match(TokenType.DOT): 2861 style = None 2862 self._retreat(self._index - 2) 2863 2864 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2865 2866 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2867 this = self._parse_statement() 2868 else: 2869 this = self._parse_table(schema=True) 2870 2871 properties = self._parse_properties() 2872 expressions = properties.expressions if properties else None 2873 partition = self._parse_partition() 2874 return self.expression( 2875 exp.Describe, 2876 this=this, 2877 style=style, 2878 kind=kind, 2879 expressions=expressions, 2880 partition=partition, 2881 format=format, 2882 ) 2883 2884 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2885 kind = self._prev.text.upper() 2886 expressions = [] 2887 2888 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2889 if self._match(TokenType.WHEN): 2890 expression = self._parse_disjunction() 2891 self._match(TokenType.THEN) 2892 else: 2893 expression = None 2894 2895 else_ = self._match(TokenType.ELSE) 2896 2897 if not self._match(TokenType.INTO): 2898 return None 2899 2900 return self.expression( 2901 exp.ConditionalInsert, 2902 this=self.expression( 2903 exp.Insert, 2904 this=self._parse_table(schema=True), 2905 expression=self._parse_derived_table_values(), 2906 ), 2907 expression=expression, 2908 else_=else_, 2909 ) 2910 2911 expression = parse_conditional_insert() 2912 while expression is not None: 2913 expressions.append(expression) 2914 expression = parse_conditional_insert() 2915 2916 return self.expression( 2917 exp.MultitableInserts, 2918 kind=kind, 2919 comments=comments, 2920 expressions=expressions, 2921 source=self._parse_table(), 2922 ) 2923 2924 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2925 comments = [] 2926 hint = self._parse_hint() 2927 overwrite = self._match(TokenType.OVERWRITE) 2928 ignore = self._match(TokenType.IGNORE) 2929 local = self._match_text_seq("LOCAL") 2930 alternative = None 2931 is_function = None 2932 2933 if self._match_text_seq("DIRECTORY"): 2934 this: t.Optional[exp.Expression] = self.expression( 2935 exp.Directory, 2936 this=self._parse_var_or_string(), 2937 local=local, 2938 row_format=self._parse_row_format(match_row=True), 2939 ) 2940 else: 2941 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2942 comments += ensure_list(self._prev_comments) 2943 return self._parse_multitable_inserts(comments) 2944 2945 if self._match(TokenType.OR): 2946 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2947 2948 self._match(TokenType.INTO) 2949 comments += ensure_list(self._prev_comments) 2950 self._match(TokenType.TABLE) 2951 is_function = self._match(TokenType.FUNCTION) 2952 2953 this = ( 2954 self._parse_table(schema=True, parse_partition=True) 2955 if not is_function 2956 else self._parse_function() 2957 ) 2958 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2959 this.set("alias", self._parse_table_alias()) 2960 2961 returning = self._parse_returning() 2962 2963 return self.expression( 2964 exp.Insert, 2965 comments=comments, 2966 hint=hint, 2967 is_function=is_function, 2968 this=this, 2969 stored=self._match_text_seq("STORED") and self._parse_stored(), 2970 by_name=self._match_text_seq("BY", "NAME"), 2971 exists=self._parse_exists(), 2972 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2973 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2974 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2975 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2976 conflict=self._parse_on_conflict(), 2977 returning=returning or self._parse_returning(), 2978 overwrite=overwrite, 2979 alternative=alternative, 2980 ignore=ignore, 2981 source=self._match(TokenType.TABLE) and self._parse_table(), 2982 ) 2983 2984 def _parse_kill(self) -> exp.Kill: 2985 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2986 2987 return self.expression( 2988 exp.Kill, 2989 this=self._parse_primary(), 2990 kind=kind, 2991 ) 2992 2993 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2994 conflict = self._match_text_seq("ON", "CONFLICT") 2995 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2996 2997 if not conflict and not duplicate: 2998 return None 2999 3000 conflict_keys = None 3001 constraint = None 3002 3003 if conflict: 3004 if self._match_text_seq("ON", "CONSTRAINT"): 3005 constraint = self._parse_id_var() 3006 elif self._match(TokenType.L_PAREN): 3007 conflict_keys = self._parse_csv(self._parse_id_var) 3008 self._match_r_paren() 3009 3010 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3011 if self._prev.token_type == TokenType.UPDATE: 3012 self._match(TokenType.SET) 3013 expressions = self._parse_csv(self._parse_equality) 3014 else: 3015 expressions = None 3016 3017 return self.expression( 3018 exp.OnConflict, 3019 duplicate=duplicate, 3020 expressions=expressions, 3021 action=action, 3022 conflict_keys=conflict_keys, 3023 constraint=constraint, 3024 where=self._parse_where(), 3025 ) 3026 3027 def _parse_returning(self) -> t.Optional[exp.Returning]: 3028 if not self._match(TokenType.RETURNING): 3029 return None 3030 return self.expression( 3031 exp.Returning, 3032 expressions=self._parse_csv(self._parse_expression), 3033 into=self._match(TokenType.INTO) and self._parse_table_part(), 3034 ) 3035 3036 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3037 if not self._match(TokenType.FORMAT): 3038 return None 3039 return self._parse_row_format() 3040 3041 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3042 index = self._index 3043 with_ = with_ or self._match_text_seq("WITH") 3044 3045 if not self._match(TokenType.SERDE_PROPERTIES): 3046 self._retreat(index) 3047 return None 3048 return self.expression( 3049 exp.SerdeProperties, 3050 **{ # type: ignore 3051 "expressions": self._parse_wrapped_properties(), 3052 "with": with_, 3053 }, 3054 ) 3055 3056 def _parse_row_format( 3057 self, match_row: bool = False 3058 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3059 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3060 return None 3061 3062 if self._match_text_seq("SERDE"): 3063 this = self._parse_string() 3064 3065 serde_properties = self._parse_serde_properties() 3066 3067 return self.expression( 3068 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3069 ) 3070 3071 self._match_text_seq("DELIMITED") 3072 3073 kwargs = {} 3074 3075 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3076 kwargs["fields"] = self._parse_string() 3077 if self._match_text_seq("ESCAPED", "BY"): 3078 kwargs["escaped"] = self._parse_string() 3079 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3080 kwargs["collection_items"] = self._parse_string() 3081 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3082 kwargs["map_keys"] = self._parse_string() 3083 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3084 kwargs["lines"] = self._parse_string() 3085 if self._match_text_seq("NULL", "DEFINED", "AS"): 3086 kwargs["null"] = self._parse_string() 3087 3088 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3089 3090 def _parse_load(self) -> exp.LoadData | exp.Command: 3091 if self._match_text_seq("DATA"): 3092 local = self._match_text_seq("LOCAL") 3093 self._match_text_seq("INPATH") 3094 inpath = self._parse_string() 3095 overwrite = self._match(TokenType.OVERWRITE) 3096 self._match_pair(TokenType.INTO, TokenType.TABLE) 3097 3098 return self.expression( 3099 exp.LoadData, 3100 this=self._parse_table(schema=True), 3101 local=local, 3102 overwrite=overwrite, 3103 inpath=inpath, 3104 partition=self._parse_partition(), 3105 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3106 serde=self._match_text_seq("SERDE") and self._parse_string(), 3107 ) 3108 return self._parse_as_command(self._prev) 3109 3110 def _parse_delete(self) -> exp.Delete: 3111 # This handles MySQL's "Multiple-Table Syntax" 3112 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3113 tables = None 3114 if not self._match(TokenType.FROM, advance=False): 3115 tables = self._parse_csv(self._parse_table) or None 3116 3117 returning = self._parse_returning() 3118 3119 return self.expression( 3120 exp.Delete, 3121 tables=tables, 3122 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3123 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3124 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3125 where=self._parse_where(), 3126 returning=returning or self._parse_returning(), 3127 limit=self._parse_limit(), 3128 ) 3129 3130 def _parse_update(self) -> exp.Update: 3131 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3132 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3133 returning = self._parse_returning() 3134 return self.expression( 3135 exp.Update, 3136 **{ # type: ignore 3137 "this": this, 3138 "expressions": expressions, 3139 "from": self._parse_from(joins=True), 3140 "where": self._parse_where(), 3141 "returning": returning or self._parse_returning(), 3142 "order": self._parse_order(), 3143 "limit": self._parse_limit(), 3144 }, 3145 ) 3146 3147 def _parse_use(self) -> exp.Use: 3148 return self.expression( 3149 exp.Use, 3150 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3151 this=self._parse_table(schema=False), 3152 ) 3153 3154 def _parse_uncache(self) -> exp.Uncache: 3155 if not self._match(TokenType.TABLE): 3156 self.raise_error("Expecting TABLE after UNCACHE") 3157 3158 return self.expression( 3159 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3160 ) 3161 3162 def _parse_cache(self) -> exp.Cache: 3163 lazy = self._match_text_seq("LAZY") 3164 self._match(TokenType.TABLE) 3165 table = self._parse_table(schema=True) 3166 3167 options = [] 3168 if self._match_text_seq("OPTIONS"): 3169 self._match_l_paren() 3170 k = self._parse_string() 3171 self._match(TokenType.EQ) 3172 v = self._parse_string() 3173 options = [k, v] 3174 self._match_r_paren() 3175 3176 self._match(TokenType.ALIAS) 3177 return self.expression( 3178 exp.Cache, 3179 this=table, 3180 lazy=lazy, 3181 options=options, 3182 expression=self._parse_select(nested=True), 3183 ) 3184 3185 def _parse_partition(self) -> t.Optional[exp.Partition]: 3186 if not self._match_texts(self.PARTITION_KEYWORDS): 3187 return None 3188 3189 return self.expression( 3190 exp.Partition, 3191 subpartition=self._prev.text.upper() == "SUBPARTITION", 3192 expressions=self._parse_wrapped_csv(self._parse_assignment), 3193 ) 3194 3195 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3196 def _parse_value_expression() -> t.Optional[exp.Expression]: 3197 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3198 return exp.var(self._prev.text.upper()) 3199 return self._parse_expression() 3200 3201 if self._match(TokenType.L_PAREN): 3202 expressions = self._parse_csv(_parse_value_expression) 3203 self._match_r_paren() 3204 return self.expression(exp.Tuple, expressions=expressions) 3205 3206 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3207 expression = self._parse_expression() 3208 if expression: 3209 return self.expression(exp.Tuple, expressions=[expression]) 3210 return None 3211 3212 def _parse_projections(self) -> t.List[exp.Expression]: 3213 return self._parse_expressions() 3214 3215 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3216 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3217 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3218 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3219 ) 3220 elif self._match(TokenType.FROM): 3221 from_ = self._parse_from(skip_from_token=True) 3222 # Support parentheses for duckdb FROM-first syntax 3223 select = self._parse_select() 3224 if select: 3225 select.set("from", from_) 3226 this = select 3227 else: 3228 this = exp.select("*").from_(t.cast(exp.From, from_)) 3229 else: 3230 this = ( 3231 self._parse_table() 3232 if table 3233 else self._parse_select(nested=True, parse_set_operation=False) 3234 ) 3235 3236 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3237 # in case a modifier (e.g. join) is following 3238 if table and isinstance(this, exp.Values) and this.alias: 3239 alias = this.args["alias"].pop() 3240 this = exp.Table(this=this, alias=alias) 3241 3242 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3243 3244 return this 3245 3246 def _parse_select( 3247 self, 3248 nested: bool = False, 3249 table: bool = False, 3250 parse_subquery_alias: bool = True, 3251 parse_set_operation: bool = True, 3252 ) -> t.Optional[exp.Expression]: 3253 cte = self._parse_with() 3254 3255 if cte: 3256 this = self._parse_statement() 3257 3258 if not this: 3259 self.raise_error("Failed to parse any statement following CTE") 3260 return cte 3261 3262 if "with" in this.arg_types: 3263 this.set("with", cte) 3264 else: 3265 self.raise_error(f"{this.key} does not support CTE") 3266 this = cte 3267 3268 return this 3269 3270 # duckdb supports leading with FROM x 3271 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3272 3273 if self._match(TokenType.SELECT): 3274 comments = self._prev_comments 3275 3276 hint = self._parse_hint() 3277 3278 if self._next and not self._next.token_type == TokenType.DOT: 3279 all_ = self._match(TokenType.ALL) 3280 distinct = self._match_set(self.DISTINCT_TOKENS) 3281 else: 3282 all_, distinct = None, None 3283 3284 kind = ( 3285 self._match(TokenType.ALIAS) 3286 and self._match_texts(("STRUCT", "VALUE")) 3287 and self._prev.text.upper() 3288 ) 3289 3290 if distinct: 3291 distinct = self.expression( 3292 exp.Distinct, 3293 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3294 ) 3295 3296 if all_ and distinct: 3297 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3298 3299 operation_modifiers = [] 3300 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3301 operation_modifiers.append(exp.var(self._prev.text.upper())) 3302 3303 limit = self._parse_limit(top=True) 3304 projections = self._parse_projections() 3305 3306 this = self.expression( 3307 exp.Select, 3308 kind=kind, 3309 hint=hint, 3310 distinct=distinct, 3311 expressions=projections, 3312 limit=limit, 3313 operation_modifiers=operation_modifiers or None, 3314 ) 3315 this.comments = comments 3316 3317 into = self._parse_into() 3318 if into: 3319 this.set("into", into) 3320 3321 if not from_: 3322 from_ = self._parse_from() 3323 3324 if from_: 3325 this.set("from", from_) 3326 3327 this = self._parse_query_modifiers(this) 3328 elif (table or nested) and self._match(TokenType.L_PAREN): 3329 this = self._parse_wrapped_select(table=table) 3330 3331 # We return early here so that the UNION isn't attached to the subquery by the 3332 # following call to _parse_set_operations, but instead becomes the parent node 3333 self._match_r_paren() 3334 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3335 elif self._match(TokenType.VALUES, advance=False): 3336 this = self._parse_derived_table_values() 3337 elif from_: 3338 this = exp.select("*").from_(from_.this, copy=False) 3339 if self._match(TokenType.PIPE_GT, advance=False): 3340 return self._parse_pipe_syntax_query(this) 3341 elif self._match(TokenType.SUMMARIZE): 3342 table = self._match(TokenType.TABLE) 3343 this = self._parse_select() or self._parse_string() or self._parse_table() 3344 return self.expression(exp.Summarize, this=this, table=table) 3345 elif self._match(TokenType.DESCRIBE): 3346 this = self._parse_describe() 3347 elif self._match_text_seq("STREAM"): 3348 this = self._parse_function() 3349 if this: 3350 this = self.expression(exp.Stream, this=this) 3351 else: 3352 self._retreat(self._index - 1) 3353 else: 3354 this = None 3355 3356 return self._parse_set_operations(this) if parse_set_operation else this 3357 3358 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3359 self._match_text_seq("SEARCH") 3360 3361 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3362 3363 if not kind: 3364 return None 3365 3366 self._match_text_seq("FIRST", "BY") 3367 3368 return self.expression( 3369 exp.RecursiveWithSearch, 3370 kind=kind, 3371 this=self._parse_id_var(), 3372 expression=self._match_text_seq("SET") and self._parse_id_var(), 3373 using=self._match_text_seq("USING") and self._parse_id_var(), 3374 ) 3375 3376 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3377 if not skip_with_token and not self._match(TokenType.WITH): 3378 return None 3379 3380 comments = self._prev_comments 3381 recursive = self._match(TokenType.RECURSIVE) 3382 3383 last_comments = None 3384 expressions = [] 3385 while True: 3386 cte = self._parse_cte() 3387 if isinstance(cte, exp.CTE): 3388 expressions.append(cte) 3389 if last_comments: 3390 cte.add_comments(last_comments) 3391 3392 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3393 break 3394 else: 3395 self._match(TokenType.WITH) 3396 3397 last_comments = self._prev_comments 3398 3399 return self.expression( 3400 exp.With, 3401 comments=comments, 3402 expressions=expressions, 3403 recursive=recursive, 3404 search=self._parse_recursive_with_search(), 3405 ) 3406 3407 def _parse_cte(self) -> t.Optional[exp.CTE]: 3408 index = self._index 3409 3410 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3411 if not alias or not alias.this: 3412 self.raise_error("Expected CTE to have alias") 3413 3414 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3415 self._retreat(index) 3416 return None 3417 3418 comments = self._prev_comments 3419 3420 if self._match_text_seq("NOT", "MATERIALIZED"): 3421 materialized = False 3422 elif self._match_text_seq("MATERIALIZED"): 3423 materialized = True 3424 else: 3425 materialized = None 3426 3427 cte = self.expression( 3428 exp.CTE, 3429 this=self._parse_wrapped(self._parse_statement), 3430 alias=alias, 3431 materialized=materialized, 3432 comments=comments, 3433 ) 3434 3435 if isinstance(cte.this, exp.Values): 3436 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3437 3438 return cte 3439 3440 def _parse_table_alias( 3441 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3442 ) -> t.Optional[exp.TableAlias]: 3443 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3444 # so this section tries to parse the clause version and if it fails, it treats the token 3445 # as an identifier (alias) 3446 if self._can_parse_limit_or_offset(): 3447 return None 3448 3449 any_token = self._match(TokenType.ALIAS) 3450 alias = ( 3451 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3452 or self._parse_string_as_identifier() 3453 ) 3454 3455 index = self._index 3456 if self._match(TokenType.L_PAREN): 3457 columns = self._parse_csv(self._parse_function_parameter) 3458 self._match_r_paren() if columns else self._retreat(index) 3459 else: 3460 columns = None 3461 3462 if not alias and not columns: 3463 return None 3464 3465 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3466 3467 # We bubble up comments from the Identifier to the TableAlias 3468 if isinstance(alias, exp.Identifier): 3469 table_alias.add_comments(alias.pop_comments()) 3470 3471 return table_alias 3472 3473 def _parse_subquery( 3474 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3475 ) -> t.Optional[exp.Subquery]: 3476 if not this: 3477 return None 3478 3479 return self.expression( 3480 exp.Subquery, 3481 this=this, 3482 pivots=self._parse_pivots(), 3483 alias=self._parse_table_alias() if parse_alias else None, 3484 sample=self._parse_table_sample(), 3485 ) 3486 3487 def _implicit_unnests_to_explicit(self, this: E) -> E: 3488 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3489 3490 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3491 for i, join in enumerate(this.args.get("joins") or []): 3492 table = join.this 3493 normalized_table = table.copy() 3494 normalized_table.meta["maybe_column"] = True 3495 normalized_table = _norm(normalized_table, dialect=self.dialect) 3496 3497 if isinstance(table, exp.Table) and not join.args.get("on"): 3498 if normalized_table.parts[0].name in refs: 3499 table_as_column = table.to_column() 3500 unnest = exp.Unnest(expressions=[table_as_column]) 3501 3502 # Table.to_column creates a parent Alias node that we want to convert to 3503 # a TableAlias and attach to the Unnest, so it matches the parser's output 3504 if isinstance(table.args.get("alias"), exp.TableAlias): 3505 table_as_column.replace(table_as_column.this) 3506 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3507 3508 table.replace(unnest) 3509 3510 refs.add(normalized_table.alias_or_name) 3511 3512 return this 3513 3514 def _parse_query_modifiers( 3515 self, this: t.Optional[exp.Expression] 3516 ) -> t.Optional[exp.Expression]: 3517 if isinstance(this, self.MODIFIABLES): 3518 for join in self._parse_joins(): 3519 this.append("joins", join) 3520 for lateral in iter(self._parse_lateral, None): 3521 this.append("laterals", lateral) 3522 3523 while True: 3524 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3525 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3526 key, expression = parser(self) 3527 3528 if expression: 3529 this.set(key, expression) 3530 if key == "limit": 3531 offset = expression.args.pop("offset", None) 3532 3533 if offset: 3534 offset = exp.Offset(expression=offset) 3535 this.set("offset", offset) 3536 3537 limit_by_expressions = expression.expressions 3538 expression.set("expressions", None) 3539 offset.set("expressions", limit_by_expressions) 3540 continue 3541 break 3542 3543 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3544 this = self._implicit_unnests_to_explicit(this) 3545 3546 return this 3547 3548 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3549 start = self._curr 3550 while self._curr: 3551 self._advance() 3552 3553 end = self._tokens[self._index - 1] 3554 return exp.Hint(expressions=[self._find_sql(start, end)]) 3555 3556 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3557 return self._parse_function_call() 3558 3559 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3560 start_index = self._index 3561 should_fallback_to_string = False 3562 3563 hints = [] 3564 try: 3565 for hint in iter( 3566 lambda: self._parse_csv( 3567 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3568 ), 3569 [], 3570 ): 3571 hints.extend(hint) 3572 except ParseError: 3573 should_fallback_to_string = True 3574 3575 if should_fallback_to_string or self._curr: 3576 self._retreat(start_index) 3577 return self._parse_hint_fallback_to_string() 3578 3579 return self.expression(exp.Hint, expressions=hints) 3580 3581 def _parse_hint(self) -> t.Optional[exp.Hint]: 3582 if self._match(TokenType.HINT) and self._prev_comments: 3583 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3584 3585 return None 3586 3587 def _parse_into(self) -> t.Optional[exp.Into]: 3588 if not self._match(TokenType.INTO): 3589 return None 3590 3591 temp = self._match(TokenType.TEMPORARY) 3592 unlogged = self._match_text_seq("UNLOGGED") 3593 self._match(TokenType.TABLE) 3594 3595 return self.expression( 3596 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3597 ) 3598 3599 def _parse_from( 3600 self, joins: bool = False, skip_from_token: bool = False 3601 ) -> t.Optional[exp.From]: 3602 if not skip_from_token and not self._match(TokenType.FROM): 3603 return None 3604 3605 return self.expression( 3606 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3607 ) 3608 3609 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3610 return self.expression( 3611 exp.MatchRecognizeMeasure, 3612 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3613 this=self._parse_expression(), 3614 ) 3615 3616 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3617 if not self._match(TokenType.MATCH_RECOGNIZE): 3618 return None 3619 3620 self._match_l_paren() 3621 3622 partition = self._parse_partition_by() 3623 order = self._parse_order() 3624 3625 measures = ( 3626 self._parse_csv(self._parse_match_recognize_measure) 3627 if self._match_text_seq("MEASURES") 3628 else None 3629 ) 3630 3631 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3632 rows = exp.var("ONE ROW PER MATCH") 3633 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3634 text = "ALL ROWS PER MATCH" 3635 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3636 text += " SHOW EMPTY MATCHES" 3637 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3638 text += " OMIT EMPTY MATCHES" 3639 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3640 text += " WITH UNMATCHED ROWS" 3641 rows = exp.var(text) 3642 else: 3643 rows = None 3644 3645 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3646 text = "AFTER MATCH SKIP" 3647 if self._match_text_seq("PAST", "LAST", "ROW"): 3648 text += " PAST LAST ROW" 3649 elif self._match_text_seq("TO", "NEXT", "ROW"): 3650 text += " TO NEXT ROW" 3651 elif self._match_text_seq("TO", "FIRST"): 3652 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3653 elif self._match_text_seq("TO", "LAST"): 3654 text += f" TO LAST {self._advance_any().text}" # type: ignore 3655 after = exp.var(text) 3656 else: 3657 after = None 3658 3659 if self._match_text_seq("PATTERN"): 3660 self._match_l_paren() 3661 3662 if not self._curr: 3663 self.raise_error("Expecting )", self._curr) 3664 3665 paren = 1 3666 start = self._curr 3667 3668 while self._curr and paren > 0: 3669 if self._curr.token_type == TokenType.L_PAREN: 3670 paren += 1 3671 if self._curr.token_type == TokenType.R_PAREN: 3672 paren -= 1 3673 3674 end = self._prev 3675 self._advance() 3676 3677 if paren > 0: 3678 self.raise_error("Expecting )", self._curr) 3679 3680 pattern = exp.var(self._find_sql(start, end)) 3681 else: 3682 pattern = None 3683 3684 define = ( 3685 self._parse_csv(self._parse_name_as_expression) 3686 if self._match_text_seq("DEFINE") 3687 else None 3688 ) 3689 3690 self._match_r_paren() 3691 3692 return self.expression( 3693 exp.MatchRecognize, 3694 partition_by=partition, 3695 order=order, 3696 measures=measures, 3697 rows=rows, 3698 after=after, 3699 pattern=pattern, 3700 define=define, 3701 alias=self._parse_table_alias(), 3702 ) 3703 3704 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3705 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3706 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3707 cross_apply = False 3708 3709 if cross_apply is not None: 3710 this = self._parse_select(table=True) 3711 view = None 3712 outer = None 3713 elif self._match(TokenType.LATERAL): 3714 this = self._parse_select(table=True) 3715 view = self._match(TokenType.VIEW) 3716 outer = self._match(TokenType.OUTER) 3717 else: 3718 return None 3719 3720 if not this: 3721 this = ( 3722 self._parse_unnest() 3723 or self._parse_function() 3724 or self._parse_id_var(any_token=False) 3725 ) 3726 3727 while self._match(TokenType.DOT): 3728 this = exp.Dot( 3729 this=this, 3730 expression=self._parse_function() or self._parse_id_var(any_token=False), 3731 ) 3732 3733 ordinality: t.Optional[bool] = None 3734 3735 if view: 3736 table = self._parse_id_var(any_token=False) 3737 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3738 table_alias: t.Optional[exp.TableAlias] = self.expression( 3739 exp.TableAlias, this=table, columns=columns 3740 ) 3741 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3742 # We move the alias from the lateral's child node to the lateral itself 3743 table_alias = this.args["alias"].pop() 3744 else: 3745 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3746 table_alias = self._parse_table_alias() 3747 3748 return self.expression( 3749 exp.Lateral, 3750 this=this, 3751 view=view, 3752 outer=outer, 3753 alias=table_alias, 3754 cross_apply=cross_apply, 3755 ordinality=ordinality, 3756 ) 3757 3758 def _parse_join_parts( 3759 self, 3760 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3761 return ( 3762 self._match_set(self.JOIN_METHODS) and self._prev, 3763 self._match_set(self.JOIN_SIDES) and self._prev, 3764 self._match_set(self.JOIN_KINDS) and self._prev, 3765 ) 3766 3767 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3768 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3769 this = self._parse_column() 3770 if isinstance(this, exp.Column): 3771 return this.this 3772 return this 3773 3774 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3775 3776 def _parse_join( 3777 self, skip_join_token: bool = False, parse_bracket: bool = False 3778 ) -> t.Optional[exp.Join]: 3779 if self._match(TokenType.COMMA): 3780 table = self._try_parse(self._parse_table) 3781 if table: 3782 return self.expression(exp.Join, this=table) 3783 return None 3784 3785 index = self._index 3786 method, side, kind = self._parse_join_parts() 3787 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3788 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3789 3790 if not skip_join_token and not join: 3791 self._retreat(index) 3792 kind = None 3793 method = None 3794 side = None 3795 3796 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3797 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3798 3799 if not skip_join_token and not join and not outer_apply and not cross_apply: 3800 return None 3801 3802 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3803 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3804 kwargs["expressions"] = self._parse_csv( 3805 lambda: self._parse_table(parse_bracket=parse_bracket) 3806 ) 3807 3808 if method: 3809 kwargs["method"] = method.text 3810 if side: 3811 kwargs["side"] = side.text 3812 if kind: 3813 kwargs["kind"] = kind.text 3814 if hint: 3815 kwargs["hint"] = hint 3816 3817 if self._match(TokenType.MATCH_CONDITION): 3818 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3819 3820 if self._match(TokenType.ON): 3821 kwargs["on"] = self._parse_assignment() 3822 elif self._match(TokenType.USING): 3823 kwargs["using"] = self._parse_using_identifiers() 3824 elif ( 3825 not (outer_apply or cross_apply) 3826 and not isinstance(kwargs["this"], exp.Unnest) 3827 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3828 ): 3829 index = self._index 3830 joins: t.Optional[list] = list(self._parse_joins()) 3831 3832 if joins and self._match(TokenType.ON): 3833 kwargs["on"] = self._parse_assignment() 3834 elif joins and self._match(TokenType.USING): 3835 kwargs["using"] = self._parse_using_identifiers() 3836 else: 3837 joins = None 3838 self._retreat(index) 3839 3840 kwargs["this"].set("joins", joins if joins else None) 3841 3842 kwargs["pivots"] = self._parse_pivots() 3843 3844 comments = [c for token in (method, side, kind) if token for c in token.comments] 3845 return self.expression(exp.Join, comments=comments, **kwargs) 3846 3847 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3848 this = self._parse_assignment() 3849 3850 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3851 return this 3852 3853 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3854 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3855 3856 return this 3857 3858 def _parse_index_params(self) -> exp.IndexParameters: 3859 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3860 3861 if self._match(TokenType.L_PAREN, advance=False): 3862 columns = self._parse_wrapped_csv(self._parse_with_operator) 3863 else: 3864 columns = None 3865 3866 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3867 partition_by = self._parse_partition_by() 3868 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3869 tablespace = ( 3870 self._parse_var(any_token=True) 3871 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3872 else None 3873 ) 3874 where = self._parse_where() 3875 3876 on = self._parse_field() if self._match(TokenType.ON) else None 3877 3878 return self.expression( 3879 exp.IndexParameters, 3880 using=using, 3881 columns=columns, 3882 include=include, 3883 partition_by=partition_by, 3884 where=where, 3885 with_storage=with_storage, 3886 tablespace=tablespace, 3887 on=on, 3888 ) 3889 3890 def _parse_index( 3891 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3892 ) -> t.Optional[exp.Index]: 3893 if index or anonymous: 3894 unique = None 3895 primary = None 3896 amp = None 3897 3898 self._match(TokenType.ON) 3899 self._match(TokenType.TABLE) # hive 3900 table = self._parse_table_parts(schema=True) 3901 else: 3902 unique = self._match(TokenType.UNIQUE) 3903 primary = self._match_text_seq("PRIMARY") 3904 amp = self._match_text_seq("AMP") 3905 3906 if not self._match(TokenType.INDEX): 3907 return None 3908 3909 index = self._parse_id_var() 3910 table = None 3911 3912 params = self._parse_index_params() 3913 3914 return self.expression( 3915 exp.Index, 3916 this=index, 3917 table=table, 3918 unique=unique, 3919 primary=primary, 3920 amp=amp, 3921 params=params, 3922 ) 3923 3924 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3925 hints: t.List[exp.Expression] = [] 3926 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3927 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3928 hints.append( 3929 self.expression( 3930 exp.WithTableHint, 3931 expressions=self._parse_csv( 3932 lambda: self._parse_function() or self._parse_var(any_token=True) 3933 ), 3934 ) 3935 ) 3936 self._match_r_paren() 3937 else: 3938 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3939 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3940 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3941 3942 self._match_set((TokenType.INDEX, TokenType.KEY)) 3943 if self._match(TokenType.FOR): 3944 hint.set("target", self._advance_any() and self._prev.text.upper()) 3945 3946 hint.set("expressions", self._parse_wrapped_id_vars()) 3947 hints.append(hint) 3948 3949 return hints or None 3950 3951 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3952 return ( 3953 (not schema and self._parse_function(optional_parens=False)) 3954 or self._parse_id_var(any_token=False) 3955 or self._parse_string_as_identifier() 3956 or self._parse_placeholder() 3957 ) 3958 3959 def _parse_table_parts( 3960 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3961 ) -> exp.Table: 3962 catalog = None 3963 db = None 3964 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3965 3966 while self._match(TokenType.DOT): 3967 if catalog: 3968 # This allows nesting the table in arbitrarily many dot expressions if needed 3969 table = self.expression( 3970 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3971 ) 3972 else: 3973 catalog = db 3974 db = table 3975 # "" used for tsql FROM a..b case 3976 table = self._parse_table_part(schema=schema) or "" 3977 3978 if ( 3979 wildcard 3980 and self._is_connected() 3981 and (isinstance(table, exp.Identifier) or not table) 3982 and self._match(TokenType.STAR) 3983 ): 3984 if isinstance(table, exp.Identifier): 3985 table.args["this"] += "*" 3986 else: 3987 table = exp.Identifier(this="*") 3988 3989 # We bubble up comments from the Identifier to the Table 3990 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3991 3992 if is_db_reference: 3993 catalog = db 3994 db = table 3995 table = None 3996 3997 if not table and not is_db_reference: 3998 self.raise_error(f"Expected table name but got {self._curr}") 3999 if not db and is_db_reference: 4000 self.raise_error(f"Expected database name but got {self._curr}") 4001 4002 table = self.expression( 4003 exp.Table, 4004 comments=comments, 4005 this=table, 4006 db=db, 4007 catalog=catalog, 4008 ) 4009 4010 changes = self._parse_changes() 4011 if changes: 4012 table.set("changes", changes) 4013 4014 at_before = self._parse_historical_data() 4015 if at_before: 4016 table.set("when", at_before) 4017 4018 pivots = self._parse_pivots() 4019 if pivots: 4020 table.set("pivots", pivots) 4021 4022 return table 4023 4024 def _parse_table( 4025 self, 4026 schema: bool = False, 4027 joins: bool = False, 4028 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4029 parse_bracket: bool = False, 4030 is_db_reference: bool = False, 4031 parse_partition: bool = False, 4032 ) -> t.Optional[exp.Expression]: 4033 lateral = self._parse_lateral() 4034 if lateral: 4035 return lateral 4036 4037 unnest = self._parse_unnest() 4038 if unnest: 4039 return unnest 4040 4041 values = self._parse_derived_table_values() 4042 if values: 4043 return values 4044 4045 subquery = self._parse_select(table=True) 4046 if subquery: 4047 if not subquery.args.get("pivots"): 4048 subquery.set("pivots", self._parse_pivots()) 4049 return subquery 4050 4051 bracket = parse_bracket and self._parse_bracket(None) 4052 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4053 4054 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4055 self._parse_table 4056 ) 4057 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4058 4059 only = self._match(TokenType.ONLY) 4060 4061 this = t.cast( 4062 exp.Expression, 4063 bracket 4064 or rows_from 4065 or self._parse_bracket( 4066 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4067 ), 4068 ) 4069 4070 if only: 4071 this.set("only", only) 4072 4073 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4074 self._match_text_seq("*") 4075 4076 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4077 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4078 this.set("partition", self._parse_partition()) 4079 4080 if schema: 4081 return self._parse_schema(this=this) 4082 4083 version = self._parse_version() 4084 4085 if version: 4086 this.set("version", version) 4087 4088 if self.dialect.ALIAS_POST_TABLESAMPLE: 4089 this.set("sample", self._parse_table_sample()) 4090 4091 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4092 if alias: 4093 this.set("alias", alias) 4094 4095 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4096 return self.expression( 4097 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4098 ) 4099 4100 this.set("hints", self._parse_table_hints()) 4101 4102 if not this.args.get("pivots"): 4103 this.set("pivots", self._parse_pivots()) 4104 4105 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4106 this.set("sample", self._parse_table_sample()) 4107 4108 if joins: 4109 for join in self._parse_joins(): 4110 this.append("joins", join) 4111 4112 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4113 this.set("ordinality", True) 4114 this.set("alias", self._parse_table_alias()) 4115 4116 return this 4117 4118 def _parse_version(self) -> t.Optional[exp.Version]: 4119 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4120 this = "TIMESTAMP" 4121 elif self._match(TokenType.VERSION_SNAPSHOT): 4122 this = "VERSION" 4123 else: 4124 return None 4125 4126 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4127 kind = self._prev.text.upper() 4128 start = self._parse_bitwise() 4129 self._match_texts(("TO", "AND")) 4130 end = self._parse_bitwise() 4131 expression: t.Optional[exp.Expression] = self.expression( 4132 exp.Tuple, expressions=[start, end] 4133 ) 4134 elif self._match_text_seq("CONTAINED", "IN"): 4135 kind = "CONTAINED IN" 4136 expression = self.expression( 4137 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4138 ) 4139 elif self._match(TokenType.ALL): 4140 kind = "ALL" 4141 expression = None 4142 else: 4143 self._match_text_seq("AS", "OF") 4144 kind = "AS OF" 4145 expression = self._parse_type() 4146 4147 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4148 4149 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4150 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4151 index = self._index 4152 historical_data = None 4153 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4154 this = self._prev.text.upper() 4155 kind = ( 4156 self._match(TokenType.L_PAREN) 4157 and self._match_texts(self.HISTORICAL_DATA_KIND) 4158 and self._prev.text.upper() 4159 ) 4160 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4161 4162 if expression: 4163 self._match_r_paren() 4164 historical_data = self.expression( 4165 exp.HistoricalData, this=this, kind=kind, expression=expression 4166 ) 4167 else: 4168 self._retreat(index) 4169 4170 return historical_data 4171 4172 def _parse_changes(self) -> t.Optional[exp.Changes]: 4173 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4174 return None 4175 4176 information = self._parse_var(any_token=True) 4177 self._match_r_paren() 4178 4179 return self.expression( 4180 exp.Changes, 4181 information=information, 4182 at_before=self._parse_historical_data(), 4183 end=self._parse_historical_data(), 4184 ) 4185 4186 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4187 if not self._match(TokenType.UNNEST): 4188 return None 4189 4190 expressions = self._parse_wrapped_csv(self._parse_equality) 4191 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4192 4193 alias = self._parse_table_alias() if with_alias else None 4194 4195 if alias: 4196 if self.dialect.UNNEST_COLUMN_ONLY: 4197 if alias.args.get("columns"): 4198 self.raise_error("Unexpected extra column alias in unnest.") 4199 4200 alias.set("columns", [alias.this]) 4201 alias.set("this", None) 4202 4203 columns = alias.args.get("columns") or [] 4204 if offset and len(expressions) < len(columns): 4205 offset = columns.pop() 4206 4207 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4208 self._match(TokenType.ALIAS) 4209 offset = self._parse_id_var( 4210 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4211 ) or exp.to_identifier("offset") 4212 4213 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4214 4215 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4216 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4217 if not is_derived and not ( 4218 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4219 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4220 ): 4221 return None 4222 4223 expressions = self._parse_csv(self._parse_value) 4224 alias = self._parse_table_alias() 4225 4226 if is_derived: 4227 self._match_r_paren() 4228 4229 return self.expression( 4230 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4231 ) 4232 4233 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4234 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4235 as_modifier and self._match_text_seq("USING", "SAMPLE") 4236 ): 4237 return None 4238 4239 bucket_numerator = None 4240 bucket_denominator = None 4241 bucket_field = None 4242 percent = None 4243 size = None 4244 seed = None 4245 4246 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4247 matched_l_paren = self._match(TokenType.L_PAREN) 4248 4249 if self.TABLESAMPLE_CSV: 4250 num = None 4251 expressions = self._parse_csv(self._parse_primary) 4252 else: 4253 expressions = None 4254 num = ( 4255 self._parse_factor() 4256 if self._match(TokenType.NUMBER, advance=False) 4257 else self._parse_primary() or self._parse_placeholder() 4258 ) 4259 4260 if self._match_text_seq("BUCKET"): 4261 bucket_numerator = self._parse_number() 4262 self._match_text_seq("OUT", "OF") 4263 bucket_denominator = bucket_denominator = self._parse_number() 4264 self._match(TokenType.ON) 4265 bucket_field = self._parse_field() 4266 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4267 percent = num 4268 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4269 size = num 4270 else: 4271 percent = num 4272 4273 if matched_l_paren: 4274 self._match_r_paren() 4275 4276 if self._match(TokenType.L_PAREN): 4277 method = self._parse_var(upper=True) 4278 seed = self._match(TokenType.COMMA) and self._parse_number() 4279 self._match_r_paren() 4280 elif self._match_texts(("SEED", "REPEATABLE")): 4281 seed = self._parse_wrapped(self._parse_number) 4282 4283 if not method and self.DEFAULT_SAMPLING_METHOD: 4284 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4285 4286 return self.expression( 4287 exp.TableSample, 4288 expressions=expressions, 4289 method=method, 4290 bucket_numerator=bucket_numerator, 4291 bucket_denominator=bucket_denominator, 4292 bucket_field=bucket_field, 4293 percent=percent, 4294 size=size, 4295 seed=seed, 4296 ) 4297 4298 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4299 return list(iter(self._parse_pivot, None)) or None 4300 4301 def _parse_joins(self) -> t.Iterator[exp.Join]: 4302 return iter(self._parse_join, None) 4303 4304 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4305 if not self._match(TokenType.INTO): 4306 return None 4307 4308 return self.expression( 4309 exp.UnpivotColumns, 4310 this=self._match_text_seq("NAME") and self._parse_column(), 4311 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4312 ) 4313 4314 # https://duckdb.org/docs/sql/statements/pivot 4315 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4316 def _parse_on() -> t.Optional[exp.Expression]: 4317 this = self._parse_bitwise() 4318 4319 if self._match(TokenType.IN): 4320 # PIVOT ... ON col IN (row_val1, row_val2) 4321 return self._parse_in(this) 4322 if self._match(TokenType.ALIAS, advance=False): 4323 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4324 return self._parse_alias(this) 4325 4326 return this 4327 4328 this = self._parse_table() 4329 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4330 into = self._parse_unpivot_columns() 4331 using = self._match(TokenType.USING) and self._parse_csv( 4332 lambda: self._parse_alias(self._parse_function()) 4333 ) 4334 group = self._parse_group() 4335 4336 return self.expression( 4337 exp.Pivot, 4338 this=this, 4339 expressions=expressions, 4340 using=using, 4341 group=group, 4342 unpivot=is_unpivot, 4343 into=into, 4344 ) 4345 4346 def _parse_pivot_in(self) -> exp.In: 4347 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4348 this = self._parse_select_or_expression() 4349 4350 self._match(TokenType.ALIAS) 4351 alias = self._parse_bitwise() 4352 if alias: 4353 if isinstance(alias, exp.Column) and not alias.db: 4354 alias = alias.this 4355 return self.expression(exp.PivotAlias, this=this, alias=alias) 4356 4357 return this 4358 4359 value = self._parse_column() 4360 4361 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4362 self.raise_error("Expecting IN (") 4363 4364 if self._match(TokenType.ANY): 4365 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4366 else: 4367 exprs = self._parse_csv(_parse_aliased_expression) 4368 4369 self._match_r_paren() 4370 return self.expression(exp.In, this=value, expressions=exprs) 4371 4372 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4373 index = self._index 4374 include_nulls = None 4375 4376 if self._match(TokenType.PIVOT): 4377 unpivot = False 4378 elif self._match(TokenType.UNPIVOT): 4379 unpivot = True 4380 4381 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4382 if self._match_text_seq("INCLUDE", "NULLS"): 4383 include_nulls = True 4384 elif self._match_text_seq("EXCLUDE", "NULLS"): 4385 include_nulls = False 4386 else: 4387 return None 4388 4389 expressions = [] 4390 4391 if not self._match(TokenType.L_PAREN): 4392 self._retreat(index) 4393 return None 4394 4395 if unpivot: 4396 expressions = self._parse_csv(self._parse_column) 4397 else: 4398 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4399 4400 if not expressions: 4401 self.raise_error("Failed to parse PIVOT's aggregation list") 4402 4403 if not self._match(TokenType.FOR): 4404 self.raise_error("Expecting FOR") 4405 4406 fields = [] 4407 while True: 4408 field = self._try_parse(self._parse_pivot_in) 4409 if not field: 4410 break 4411 fields.append(field) 4412 4413 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4414 self._parse_bitwise 4415 ) 4416 4417 group = self._parse_group() 4418 4419 self._match_r_paren() 4420 4421 pivot = self.expression( 4422 exp.Pivot, 4423 expressions=expressions, 4424 fields=fields, 4425 unpivot=unpivot, 4426 include_nulls=include_nulls, 4427 default_on_null=default_on_null, 4428 group=group, 4429 ) 4430 4431 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4432 pivot.set("alias", self._parse_table_alias()) 4433 4434 if not unpivot: 4435 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4436 4437 columns: t.List[exp.Expression] = [] 4438 all_fields = [] 4439 for pivot_field in pivot.fields: 4440 pivot_field_expressions = pivot_field.expressions 4441 4442 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4443 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4444 continue 4445 4446 all_fields.append( 4447 [ 4448 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4449 for fld in pivot_field_expressions 4450 ] 4451 ) 4452 4453 if all_fields: 4454 if names: 4455 all_fields.append(names) 4456 4457 # Generate all possible combinations of the pivot columns 4458 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4459 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4460 for fld_parts_tuple in itertools.product(*all_fields): 4461 fld_parts = list(fld_parts_tuple) 4462 4463 if names and self.PREFIXED_PIVOT_COLUMNS: 4464 # Move the "name" to the front of the list 4465 fld_parts.insert(0, fld_parts.pop(-1)) 4466 4467 columns.append(exp.to_identifier("_".join(fld_parts))) 4468 4469 pivot.set("columns", columns) 4470 4471 return pivot 4472 4473 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4474 return [agg.alias for agg in aggregations if agg.alias] 4475 4476 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4477 if not skip_where_token and not self._match(TokenType.PREWHERE): 4478 return None 4479 4480 return self.expression( 4481 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4482 ) 4483 4484 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4485 if not skip_where_token and not self._match(TokenType.WHERE): 4486 return None 4487 4488 return self.expression( 4489 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4490 ) 4491 4492 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4493 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4494 return None 4495 4496 elements: t.Dict[str, t.Any] = defaultdict(list) 4497 4498 if self._match(TokenType.ALL): 4499 elements["all"] = True 4500 elif self._match(TokenType.DISTINCT): 4501 elements["all"] = False 4502 4503 while True: 4504 index = self._index 4505 4506 elements["expressions"].extend( 4507 self._parse_csv( 4508 lambda: None 4509 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4510 else self._parse_assignment() 4511 ) 4512 ) 4513 4514 before_with_index = self._index 4515 with_prefix = self._match(TokenType.WITH) 4516 4517 if self._match(TokenType.ROLLUP): 4518 elements["rollup"].append( 4519 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4520 ) 4521 elif self._match(TokenType.CUBE): 4522 elements["cube"].append( 4523 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4524 ) 4525 elif self._match(TokenType.GROUPING_SETS): 4526 elements["grouping_sets"].append( 4527 self.expression( 4528 exp.GroupingSets, 4529 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4530 ) 4531 ) 4532 elif self._match_text_seq("TOTALS"): 4533 elements["totals"] = True # type: ignore 4534 4535 if before_with_index <= self._index <= before_with_index + 1: 4536 self._retreat(before_with_index) 4537 break 4538 4539 if index == self._index: 4540 break 4541 4542 return self.expression(exp.Group, **elements) # type: ignore 4543 4544 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4545 return self.expression( 4546 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4547 ) 4548 4549 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4550 if self._match(TokenType.L_PAREN): 4551 grouping_set = self._parse_csv(self._parse_column) 4552 self._match_r_paren() 4553 return self.expression(exp.Tuple, expressions=grouping_set) 4554 4555 return self._parse_column() 4556 4557 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4558 if not skip_having_token and not self._match(TokenType.HAVING): 4559 return None 4560 return self.expression(exp.Having, this=self._parse_assignment()) 4561 4562 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4563 if not self._match(TokenType.QUALIFY): 4564 return None 4565 return self.expression(exp.Qualify, this=self._parse_assignment()) 4566 4567 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4568 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4569 exp.Prior, this=self._parse_bitwise() 4570 ) 4571 connect = self._parse_assignment() 4572 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4573 return connect 4574 4575 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4576 if skip_start_token: 4577 start = None 4578 elif self._match(TokenType.START_WITH): 4579 start = self._parse_assignment() 4580 else: 4581 return None 4582 4583 self._match(TokenType.CONNECT_BY) 4584 nocycle = self._match_text_seq("NOCYCLE") 4585 connect = self._parse_connect_with_prior() 4586 4587 if not start and self._match(TokenType.START_WITH): 4588 start = self._parse_assignment() 4589 4590 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4591 4592 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4593 this = self._parse_id_var(any_token=True) 4594 if self._match(TokenType.ALIAS): 4595 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4596 return this 4597 4598 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4599 if self._match_text_seq("INTERPOLATE"): 4600 return self._parse_wrapped_csv(self._parse_name_as_expression) 4601 return None 4602 4603 def _parse_order( 4604 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4605 ) -> t.Optional[exp.Expression]: 4606 siblings = None 4607 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4608 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4609 return this 4610 4611 siblings = True 4612 4613 return self.expression( 4614 exp.Order, 4615 this=this, 4616 expressions=self._parse_csv(self._parse_ordered), 4617 siblings=siblings, 4618 ) 4619 4620 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4621 if not self._match(token): 4622 return None 4623 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4624 4625 def _parse_ordered( 4626 self, parse_method: t.Optional[t.Callable] = None 4627 ) -> t.Optional[exp.Ordered]: 4628 this = parse_method() if parse_method else self._parse_assignment() 4629 if not this: 4630 return None 4631 4632 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4633 this = exp.var("ALL") 4634 4635 asc = self._match(TokenType.ASC) 4636 desc = self._match(TokenType.DESC) or (asc and False) 4637 4638 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4639 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4640 4641 nulls_first = is_nulls_first or False 4642 explicitly_null_ordered = is_nulls_first or is_nulls_last 4643 4644 if ( 4645 not explicitly_null_ordered 4646 and ( 4647 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4648 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4649 ) 4650 and self.dialect.NULL_ORDERING != "nulls_are_last" 4651 ): 4652 nulls_first = True 4653 4654 if self._match_text_seq("WITH", "FILL"): 4655 with_fill = self.expression( 4656 exp.WithFill, 4657 **{ # type: ignore 4658 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4659 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4660 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4661 "interpolate": self._parse_interpolate(), 4662 }, 4663 ) 4664 else: 4665 with_fill = None 4666 4667 return self.expression( 4668 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4669 ) 4670 4671 def _parse_limit_options(self) -> exp.LimitOptions: 4672 percent = self._match(TokenType.PERCENT) 4673 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4674 self._match_text_seq("ONLY") 4675 with_ties = self._match_text_seq("WITH", "TIES") 4676 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4677 4678 def _parse_limit( 4679 self, 4680 this: t.Optional[exp.Expression] = None, 4681 top: bool = False, 4682 skip_limit_token: bool = False, 4683 ) -> t.Optional[exp.Expression]: 4684 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4685 comments = self._prev_comments 4686 if top: 4687 limit_paren = self._match(TokenType.L_PAREN) 4688 expression = self._parse_term() if limit_paren else self._parse_number() 4689 4690 if limit_paren: 4691 self._match_r_paren() 4692 4693 limit_options = self._parse_limit_options() 4694 else: 4695 limit_options = None 4696 expression = self._parse_term() 4697 4698 if self._match(TokenType.COMMA): 4699 offset = expression 4700 expression = self._parse_term() 4701 else: 4702 offset = None 4703 4704 limit_exp = self.expression( 4705 exp.Limit, 4706 this=this, 4707 expression=expression, 4708 offset=offset, 4709 comments=comments, 4710 limit_options=limit_options, 4711 expressions=self._parse_limit_by(), 4712 ) 4713 4714 return limit_exp 4715 4716 if self._match(TokenType.FETCH): 4717 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4718 direction = self._prev.text.upper() if direction else "FIRST" 4719 4720 count = self._parse_field(tokens=self.FETCH_TOKENS) 4721 4722 return self.expression( 4723 exp.Fetch, 4724 direction=direction, 4725 count=count, 4726 limit_options=self._parse_limit_options(), 4727 ) 4728 4729 return this 4730 4731 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4732 if not self._match(TokenType.OFFSET): 4733 return this 4734 4735 count = self._parse_term() 4736 self._match_set((TokenType.ROW, TokenType.ROWS)) 4737 4738 return self.expression( 4739 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4740 ) 4741 4742 def _can_parse_limit_or_offset(self) -> bool: 4743 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4744 return False 4745 4746 index = self._index 4747 result = bool( 4748 self._try_parse(self._parse_limit, retreat=True) 4749 or self._try_parse(self._parse_offset, retreat=True) 4750 ) 4751 self._retreat(index) 4752 return result 4753 4754 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4755 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4756 4757 def _parse_locks(self) -> t.List[exp.Lock]: 4758 locks = [] 4759 while True: 4760 if self._match_text_seq("FOR", "UPDATE"): 4761 update = True 4762 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4763 "LOCK", "IN", "SHARE", "MODE" 4764 ): 4765 update = False 4766 else: 4767 break 4768 4769 expressions = None 4770 if self._match_text_seq("OF"): 4771 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4772 4773 wait: t.Optional[bool | exp.Expression] = None 4774 if self._match_text_seq("NOWAIT"): 4775 wait = True 4776 elif self._match_text_seq("WAIT"): 4777 wait = self._parse_primary() 4778 elif self._match_text_seq("SKIP", "LOCKED"): 4779 wait = False 4780 4781 locks.append( 4782 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4783 ) 4784 4785 return locks 4786 4787 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4788 start = self._index 4789 _, side_token, kind_token = self._parse_join_parts() 4790 4791 side = side_token.text if side_token else None 4792 kind = kind_token.text if kind_token else None 4793 4794 if not self._match_set(self.SET_OPERATIONS): 4795 self._retreat(start) 4796 return None 4797 4798 token_type = self._prev.token_type 4799 4800 if token_type == TokenType.UNION: 4801 operation: t.Type[exp.SetOperation] = exp.Union 4802 elif token_type == TokenType.EXCEPT: 4803 operation = exp.Except 4804 else: 4805 operation = exp.Intersect 4806 4807 comments = self._prev.comments 4808 4809 if self._match(TokenType.DISTINCT): 4810 distinct: t.Optional[bool] = True 4811 elif self._match(TokenType.ALL): 4812 distinct = False 4813 else: 4814 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4815 if distinct is None: 4816 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4817 4818 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4819 "STRICT", "CORRESPONDING" 4820 ) 4821 if self._match_text_seq("CORRESPONDING"): 4822 by_name = True 4823 if not side and not kind: 4824 kind = "INNER" 4825 4826 on_column_list = None 4827 if by_name and self._match_texts(("ON", "BY")): 4828 on_column_list = self._parse_wrapped_csv(self._parse_column) 4829 4830 expression = self._parse_select(nested=True, parse_set_operation=False) 4831 4832 return self.expression( 4833 operation, 4834 comments=comments, 4835 this=this, 4836 distinct=distinct, 4837 by_name=by_name, 4838 expression=expression, 4839 side=side, 4840 kind=kind, 4841 on=on_column_list, 4842 ) 4843 4844 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4845 while this: 4846 setop = self.parse_set_operation(this) 4847 if not setop: 4848 break 4849 this = setop 4850 4851 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4852 expression = this.expression 4853 4854 if expression: 4855 for arg in self.SET_OP_MODIFIERS: 4856 expr = expression.args.get(arg) 4857 if expr: 4858 this.set(arg, expr.pop()) 4859 4860 return this 4861 4862 def _parse_expression(self) -> t.Optional[exp.Expression]: 4863 return self._parse_alias(self._parse_assignment()) 4864 4865 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4866 this = self._parse_disjunction() 4867 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4868 # This allows us to parse <non-identifier token> := <expr> 4869 this = exp.column( 4870 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4871 ) 4872 4873 while self._match_set(self.ASSIGNMENT): 4874 if isinstance(this, exp.Column) and len(this.parts) == 1: 4875 this = this.this 4876 4877 this = self.expression( 4878 self.ASSIGNMENT[self._prev.token_type], 4879 this=this, 4880 comments=self._prev_comments, 4881 expression=self._parse_assignment(), 4882 ) 4883 4884 return this 4885 4886 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4887 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4888 4889 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4890 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4891 4892 def _parse_equality(self) -> t.Optional[exp.Expression]: 4893 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4894 4895 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4896 return self._parse_tokens(self._parse_range, self.COMPARISON) 4897 4898 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4899 this = this or self._parse_bitwise() 4900 negate = self._match(TokenType.NOT) 4901 4902 if self._match_set(self.RANGE_PARSERS): 4903 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4904 if not expression: 4905 return this 4906 4907 this = expression 4908 elif self._match(TokenType.ISNULL): 4909 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4910 4911 # Postgres supports ISNULL and NOTNULL for conditions. 4912 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4913 if self._match(TokenType.NOTNULL): 4914 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4915 this = self.expression(exp.Not, this=this) 4916 4917 if negate: 4918 this = self._negate_range(this) 4919 4920 if self._match(TokenType.IS): 4921 this = self._parse_is(this) 4922 4923 return this 4924 4925 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4926 if not this: 4927 return this 4928 4929 return self.expression(exp.Not, this=this) 4930 4931 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4932 index = self._index - 1 4933 negate = self._match(TokenType.NOT) 4934 4935 if self._match_text_seq("DISTINCT", "FROM"): 4936 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4937 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4938 4939 if self._match(TokenType.JSON): 4940 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4941 4942 if self._match_text_seq("WITH"): 4943 _with = True 4944 elif self._match_text_seq("WITHOUT"): 4945 _with = False 4946 else: 4947 _with = None 4948 4949 unique = self._match(TokenType.UNIQUE) 4950 self._match_text_seq("KEYS") 4951 expression: t.Optional[exp.Expression] = self.expression( 4952 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4953 ) 4954 else: 4955 expression = self._parse_primary() or self._parse_null() 4956 if not expression: 4957 self._retreat(index) 4958 return None 4959 4960 this = self.expression(exp.Is, this=this, expression=expression) 4961 return self.expression(exp.Not, this=this) if negate else this 4962 4963 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4964 unnest = self._parse_unnest(with_alias=False) 4965 if unnest: 4966 this = self.expression(exp.In, this=this, unnest=unnest) 4967 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4968 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4969 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4970 4971 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4972 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4973 else: 4974 this = self.expression(exp.In, this=this, expressions=expressions) 4975 4976 if matched_l_paren: 4977 self._match_r_paren(this) 4978 elif not self._match(TokenType.R_BRACKET, expression=this): 4979 self.raise_error("Expecting ]") 4980 else: 4981 this = self.expression(exp.In, this=this, field=self._parse_column()) 4982 4983 return this 4984 4985 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4986 low = self._parse_bitwise() 4987 self._match(TokenType.AND) 4988 high = self._parse_bitwise() 4989 return self.expression(exp.Between, this=this, low=low, high=high) 4990 4991 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4992 if not self._match(TokenType.ESCAPE): 4993 return this 4994 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4995 4996 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4997 index = self._index 4998 4999 if not self._match(TokenType.INTERVAL) and match_interval: 5000 return None 5001 5002 if self._match(TokenType.STRING, advance=False): 5003 this = self._parse_primary() 5004 else: 5005 this = self._parse_term() 5006 5007 if not this or ( 5008 isinstance(this, exp.Column) 5009 and not this.table 5010 and not this.this.quoted 5011 and this.name.upper() == "IS" 5012 ): 5013 self._retreat(index) 5014 return None 5015 5016 unit = self._parse_function() or ( 5017 not self._match(TokenType.ALIAS, advance=False) 5018 and self._parse_var(any_token=True, upper=True) 5019 ) 5020 5021 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5022 # each INTERVAL expression into this canonical form so it's easy to transpile 5023 if this and this.is_number: 5024 this = exp.Literal.string(this.to_py()) 5025 elif this and this.is_string: 5026 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5027 if parts and unit: 5028 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5029 unit = None 5030 self._retreat(self._index - 1) 5031 5032 if len(parts) == 1: 5033 this = exp.Literal.string(parts[0][0]) 5034 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5035 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5036 unit = self.expression( 5037 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5038 ) 5039 5040 interval = self.expression(exp.Interval, this=this, unit=unit) 5041 5042 index = self._index 5043 self._match(TokenType.PLUS) 5044 5045 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5046 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5047 return self.expression( 5048 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5049 ) 5050 5051 self._retreat(index) 5052 return interval 5053 5054 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5055 this = self._parse_term() 5056 5057 while True: 5058 if self._match_set(self.BITWISE): 5059 this = self.expression( 5060 self.BITWISE[self._prev.token_type], 5061 this=this, 5062 expression=self._parse_term(), 5063 ) 5064 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5065 this = self.expression( 5066 exp.DPipe, 5067 this=this, 5068 expression=self._parse_term(), 5069 safe=not self.dialect.STRICT_STRING_CONCAT, 5070 ) 5071 elif self._match(TokenType.DQMARK): 5072 this = self.expression( 5073 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5074 ) 5075 elif self._match_pair(TokenType.LT, TokenType.LT): 5076 this = self.expression( 5077 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5078 ) 5079 elif self._match_pair(TokenType.GT, TokenType.GT): 5080 this = self.expression( 5081 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5082 ) 5083 else: 5084 break 5085 5086 return this 5087 5088 def _parse_term(self) -> t.Optional[exp.Expression]: 5089 this = self._parse_factor() 5090 5091 while self._match_set(self.TERM): 5092 klass = self.TERM[self._prev.token_type] 5093 comments = self._prev_comments 5094 expression = self._parse_factor() 5095 5096 this = self.expression(klass, this=this, comments=comments, expression=expression) 5097 5098 if isinstance(this, exp.Collate): 5099 expr = this.expression 5100 5101 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5102 # fallback to Identifier / Var 5103 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5104 ident = expr.this 5105 if isinstance(ident, exp.Identifier): 5106 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5107 5108 return this 5109 5110 def _parse_factor(self) -> t.Optional[exp.Expression]: 5111 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5112 this = parse_method() 5113 5114 while self._match_set(self.FACTOR): 5115 klass = self.FACTOR[self._prev.token_type] 5116 comments = self._prev_comments 5117 expression = parse_method() 5118 5119 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5120 self._retreat(self._index - 1) 5121 return this 5122 5123 this = self.expression(klass, this=this, comments=comments, expression=expression) 5124 5125 if isinstance(this, exp.Div): 5126 this.args["typed"] = self.dialect.TYPED_DIVISION 5127 this.args["safe"] = self.dialect.SAFE_DIVISION 5128 5129 return this 5130 5131 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5132 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5133 5134 def _parse_unary(self) -> t.Optional[exp.Expression]: 5135 if self._match_set(self.UNARY_PARSERS): 5136 return self.UNARY_PARSERS[self._prev.token_type](self) 5137 return self._parse_at_time_zone(self._parse_type()) 5138 5139 def _parse_type( 5140 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5141 ) -> t.Optional[exp.Expression]: 5142 interval = parse_interval and self._parse_interval() 5143 if interval: 5144 return interval 5145 5146 index = self._index 5147 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5148 5149 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5150 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5151 if isinstance(data_type, exp.Cast): 5152 # This constructor can contain ops directly after it, for instance struct unnesting: 5153 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5154 return self._parse_column_ops(data_type) 5155 5156 if data_type: 5157 index2 = self._index 5158 this = self._parse_primary() 5159 5160 if isinstance(this, exp.Literal): 5161 this = self._parse_column_ops(this) 5162 5163 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5164 if parser: 5165 return parser(self, this, data_type) 5166 5167 return self.expression(exp.Cast, this=this, to=data_type) 5168 5169 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5170 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5171 # 5172 # If the index difference here is greater than 1, that means the parser itself must have 5173 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5174 # 5175 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5176 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5177 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5178 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5179 # 5180 # In these cases, we don't really want to return the converted type, but instead retreat 5181 # and try to parse a Column or Identifier in the section below. 5182 if data_type.expressions and index2 - index > 1: 5183 self._retreat(index2) 5184 return self._parse_column_ops(data_type) 5185 5186 self._retreat(index) 5187 5188 if fallback_to_identifier: 5189 return self._parse_id_var() 5190 5191 this = self._parse_column() 5192 return this and self._parse_column_ops(this) 5193 5194 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5195 this = self._parse_type() 5196 if not this: 5197 return None 5198 5199 if isinstance(this, exp.Column) and not this.table: 5200 this = exp.var(this.name.upper()) 5201 5202 return self.expression( 5203 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5204 ) 5205 5206 def _parse_types( 5207 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5208 ) -> t.Optional[exp.Expression]: 5209 index = self._index 5210 5211 this: t.Optional[exp.Expression] = None 5212 prefix = self._match_text_seq("SYSUDTLIB", ".") 5213 5214 if not self._match_set(self.TYPE_TOKENS): 5215 identifier = allow_identifiers and self._parse_id_var( 5216 any_token=False, tokens=(TokenType.VAR,) 5217 ) 5218 if isinstance(identifier, exp.Identifier): 5219 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5220 5221 if len(tokens) != 1: 5222 self.raise_error("Unexpected identifier", self._prev) 5223 5224 if tokens[0].token_type in self.TYPE_TOKENS: 5225 self._prev = tokens[0] 5226 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5227 type_name = identifier.name 5228 5229 while self._match(TokenType.DOT): 5230 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5231 5232 this = exp.DataType.build(type_name, udt=True) 5233 else: 5234 self._retreat(self._index - 1) 5235 return None 5236 else: 5237 return None 5238 5239 type_token = self._prev.token_type 5240 5241 if type_token == TokenType.PSEUDO_TYPE: 5242 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5243 5244 if type_token == TokenType.OBJECT_IDENTIFIER: 5245 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5246 5247 # https://materialize.com/docs/sql/types/map/ 5248 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5249 key_type = self._parse_types( 5250 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5251 ) 5252 if not self._match(TokenType.FARROW): 5253 self._retreat(index) 5254 return None 5255 5256 value_type = self._parse_types( 5257 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5258 ) 5259 if not self._match(TokenType.R_BRACKET): 5260 self._retreat(index) 5261 return None 5262 5263 return exp.DataType( 5264 this=exp.DataType.Type.MAP, 5265 expressions=[key_type, value_type], 5266 nested=True, 5267 prefix=prefix, 5268 ) 5269 5270 nested = type_token in self.NESTED_TYPE_TOKENS 5271 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5272 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5273 expressions = None 5274 maybe_func = False 5275 5276 if self._match(TokenType.L_PAREN): 5277 if is_struct: 5278 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5279 elif nested: 5280 expressions = self._parse_csv( 5281 lambda: self._parse_types( 5282 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5283 ) 5284 ) 5285 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5286 this = expressions[0] 5287 this.set("nullable", True) 5288 self._match_r_paren() 5289 return this 5290 elif type_token in self.ENUM_TYPE_TOKENS: 5291 expressions = self._parse_csv(self._parse_equality) 5292 elif is_aggregate: 5293 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5294 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5295 ) 5296 if not func_or_ident: 5297 return None 5298 expressions = [func_or_ident] 5299 if self._match(TokenType.COMMA): 5300 expressions.extend( 5301 self._parse_csv( 5302 lambda: self._parse_types( 5303 check_func=check_func, 5304 schema=schema, 5305 allow_identifiers=allow_identifiers, 5306 ) 5307 ) 5308 ) 5309 else: 5310 expressions = self._parse_csv(self._parse_type_size) 5311 5312 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5313 if type_token == TokenType.VECTOR and len(expressions) == 2: 5314 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5315 5316 if not expressions or not self._match(TokenType.R_PAREN): 5317 self._retreat(index) 5318 return None 5319 5320 maybe_func = True 5321 5322 values: t.Optional[t.List[exp.Expression]] = None 5323 5324 if nested and self._match(TokenType.LT): 5325 if is_struct: 5326 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5327 else: 5328 expressions = self._parse_csv( 5329 lambda: self._parse_types( 5330 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5331 ) 5332 ) 5333 5334 if not self._match(TokenType.GT): 5335 self.raise_error("Expecting >") 5336 5337 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5338 values = self._parse_csv(self._parse_assignment) 5339 if not values and is_struct: 5340 values = None 5341 self._retreat(self._index - 1) 5342 else: 5343 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5344 5345 if type_token in self.TIMESTAMPS: 5346 if self._match_text_seq("WITH", "TIME", "ZONE"): 5347 maybe_func = False 5348 tz_type = ( 5349 exp.DataType.Type.TIMETZ 5350 if type_token in self.TIMES 5351 else exp.DataType.Type.TIMESTAMPTZ 5352 ) 5353 this = exp.DataType(this=tz_type, expressions=expressions) 5354 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5355 maybe_func = False 5356 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5357 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5358 maybe_func = False 5359 elif type_token == TokenType.INTERVAL: 5360 unit = self._parse_var(upper=True) 5361 if unit: 5362 if self._match_text_seq("TO"): 5363 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5364 5365 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5366 else: 5367 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5368 elif type_token == TokenType.VOID: 5369 this = exp.DataType(this=exp.DataType.Type.NULL) 5370 5371 if maybe_func and check_func: 5372 index2 = self._index 5373 peek = self._parse_string() 5374 5375 if not peek: 5376 self._retreat(index) 5377 return None 5378 5379 self._retreat(index2) 5380 5381 if not this: 5382 if self._match_text_seq("UNSIGNED"): 5383 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5384 if not unsigned_type_token: 5385 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5386 5387 type_token = unsigned_type_token or type_token 5388 5389 this = exp.DataType( 5390 this=exp.DataType.Type[type_token.value], 5391 expressions=expressions, 5392 nested=nested, 5393 prefix=prefix, 5394 ) 5395 5396 # Empty arrays/structs are allowed 5397 if values is not None: 5398 cls = exp.Struct if is_struct else exp.Array 5399 this = exp.cast(cls(expressions=values), this, copy=False) 5400 5401 elif expressions: 5402 this.set("expressions", expressions) 5403 5404 # https://materialize.com/docs/sql/types/list/#type-name 5405 while self._match(TokenType.LIST): 5406 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5407 5408 index = self._index 5409 5410 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5411 matched_array = self._match(TokenType.ARRAY) 5412 5413 while self._curr: 5414 datatype_token = self._prev.token_type 5415 matched_l_bracket = self._match(TokenType.L_BRACKET) 5416 5417 if (not matched_l_bracket and not matched_array) or ( 5418 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5419 ): 5420 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5421 # not to be confused with the fixed size array parsing 5422 break 5423 5424 matched_array = False 5425 values = self._parse_csv(self._parse_assignment) or None 5426 if ( 5427 values 5428 and not schema 5429 and ( 5430 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5431 ) 5432 ): 5433 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5434 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5435 self._retreat(index) 5436 break 5437 5438 this = exp.DataType( 5439 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5440 ) 5441 self._match(TokenType.R_BRACKET) 5442 5443 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5444 converter = self.TYPE_CONVERTERS.get(this.this) 5445 if converter: 5446 this = converter(t.cast(exp.DataType, this)) 5447 5448 return this 5449 5450 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5451 index = self._index 5452 5453 if ( 5454 self._curr 5455 and self._next 5456 and self._curr.token_type in self.TYPE_TOKENS 5457 and self._next.token_type in self.TYPE_TOKENS 5458 ): 5459 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5460 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5461 this = self._parse_id_var() 5462 else: 5463 this = ( 5464 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5465 or self._parse_id_var() 5466 ) 5467 5468 self._match(TokenType.COLON) 5469 5470 if ( 5471 type_required 5472 and not isinstance(this, exp.DataType) 5473 and not self._match_set(self.TYPE_TOKENS, advance=False) 5474 ): 5475 self._retreat(index) 5476 return self._parse_types() 5477 5478 return self._parse_column_def(this) 5479 5480 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5481 if not self._match_text_seq("AT", "TIME", "ZONE"): 5482 return this 5483 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5484 5485 def _parse_column(self) -> t.Optional[exp.Expression]: 5486 this = self._parse_column_reference() 5487 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5488 5489 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5490 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5491 5492 return column 5493 5494 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5495 this = self._parse_field() 5496 if ( 5497 not this 5498 and self._match(TokenType.VALUES, advance=False) 5499 and self.VALUES_FOLLOWED_BY_PAREN 5500 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5501 ): 5502 this = self._parse_id_var() 5503 5504 if isinstance(this, exp.Identifier): 5505 # We bubble up comments from the Identifier to the Column 5506 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5507 5508 return this 5509 5510 def _parse_colon_as_variant_extract( 5511 self, this: t.Optional[exp.Expression] 5512 ) -> t.Optional[exp.Expression]: 5513 casts = [] 5514 json_path = [] 5515 escape = None 5516 5517 while self._match(TokenType.COLON): 5518 start_index = self._index 5519 5520 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5521 path = self._parse_column_ops( 5522 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5523 ) 5524 5525 # The cast :: operator has a lower precedence than the extraction operator :, so 5526 # we rearrange the AST appropriately to avoid casting the JSON path 5527 while isinstance(path, exp.Cast): 5528 casts.append(path.to) 5529 path = path.this 5530 5531 if casts: 5532 dcolon_offset = next( 5533 i 5534 for i, t in enumerate(self._tokens[start_index:]) 5535 if t.token_type == TokenType.DCOLON 5536 ) 5537 end_token = self._tokens[start_index + dcolon_offset - 1] 5538 else: 5539 end_token = self._prev 5540 5541 if path: 5542 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5543 # it'll roundtrip to a string literal in GET_PATH 5544 if isinstance(path, exp.Identifier) and path.quoted: 5545 escape = True 5546 5547 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5548 5549 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5550 # Databricks transforms it back to the colon/dot notation 5551 if json_path: 5552 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5553 5554 if json_path_expr: 5555 json_path_expr.set("escape", escape) 5556 5557 this = self.expression( 5558 exp.JSONExtract, 5559 this=this, 5560 expression=json_path_expr, 5561 variant_extract=True, 5562 ) 5563 5564 while casts: 5565 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5566 5567 return this 5568 5569 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5570 return self._parse_types() 5571 5572 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5573 this = self._parse_bracket(this) 5574 5575 while self._match_set(self.COLUMN_OPERATORS): 5576 op_token = self._prev.token_type 5577 op = self.COLUMN_OPERATORS.get(op_token) 5578 5579 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5580 field = self._parse_dcolon() 5581 if not field: 5582 self.raise_error("Expected type") 5583 elif op and self._curr: 5584 field = self._parse_column_reference() or self._parse_bracket() 5585 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5586 field = self._parse_column_ops(field) 5587 else: 5588 field = self._parse_field(any_token=True, anonymous_func=True) 5589 5590 if isinstance(field, (exp.Func, exp.Window)) and this: 5591 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5592 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5593 this = exp.replace_tree( 5594 this, 5595 lambda n: ( 5596 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5597 if n.table 5598 else n.this 5599 ) 5600 if isinstance(n, exp.Column) 5601 else n, 5602 ) 5603 5604 if op: 5605 this = op(self, this, field) 5606 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5607 this = self.expression( 5608 exp.Column, 5609 comments=this.comments, 5610 this=field, 5611 table=this.this, 5612 db=this.args.get("table"), 5613 catalog=this.args.get("db"), 5614 ) 5615 elif isinstance(field, exp.Window): 5616 # Move the exp.Dot's to the window's function 5617 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5618 field.set("this", window_func) 5619 this = field 5620 else: 5621 this = self.expression(exp.Dot, this=this, expression=field) 5622 5623 if field and field.comments: 5624 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5625 5626 this = self._parse_bracket(this) 5627 5628 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5629 5630 def _parse_primary(self) -> t.Optional[exp.Expression]: 5631 if self._match_set(self.PRIMARY_PARSERS): 5632 token_type = self._prev.token_type 5633 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5634 5635 if token_type == TokenType.STRING: 5636 expressions = [primary] 5637 while self._match(TokenType.STRING): 5638 expressions.append(exp.Literal.string(self._prev.text)) 5639 5640 if len(expressions) > 1: 5641 return self.expression(exp.Concat, expressions=expressions) 5642 5643 return primary 5644 5645 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5646 return exp.Literal.number(f"0.{self._prev.text}") 5647 5648 if self._match(TokenType.L_PAREN): 5649 comments = self._prev_comments 5650 query = self._parse_select() 5651 5652 if query: 5653 expressions = [query] 5654 else: 5655 expressions = self._parse_expressions() 5656 5657 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5658 5659 if not this and self._match(TokenType.R_PAREN, advance=False): 5660 this = self.expression(exp.Tuple) 5661 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5662 this = self._parse_subquery(this=this, parse_alias=False) 5663 elif isinstance(this, exp.Subquery): 5664 this = self._parse_subquery( 5665 this=self._parse_set_operations(this), parse_alias=False 5666 ) 5667 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5668 this = self.expression(exp.Tuple, expressions=expressions) 5669 else: 5670 this = self.expression(exp.Paren, this=this) 5671 5672 if this: 5673 this.add_comments(comments) 5674 5675 self._match_r_paren(expression=this) 5676 return this 5677 5678 return None 5679 5680 def _parse_field( 5681 self, 5682 any_token: bool = False, 5683 tokens: t.Optional[t.Collection[TokenType]] = None, 5684 anonymous_func: bool = False, 5685 ) -> t.Optional[exp.Expression]: 5686 if anonymous_func: 5687 field = ( 5688 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5689 or self._parse_primary() 5690 ) 5691 else: 5692 field = self._parse_primary() or self._parse_function( 5693 anonymous=anonymous_func, any_token=any_token 5694 ) 5695 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5696 5697 def _parse_function( 5698 self, 5699 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5700 anonymous: bool = False, 5701 optional_parens: bool = True, 5702 any_token: bool = False, 5703 ) -> t.Optional[exp.Expression]: 5704 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5705 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5706 fn_syntax = False 5707 if ( 5708 self._match(TokenType.L_BRACE, advance=False) 5709 and self._next 5710 and self._next.text.upper() == "FN" 5711 ): 5712 self._advance(2) 5713 fn_syntax = True 5714 5715 func = self._parse_function_call( 5716 functions=functions, 5717 anonymous=anonymous, 5718 optional_parens=optional_parens, 5719 any_token=any_token, 5720 ) 5721 5722 if fn_syntax: 5723 self._match(TokenType.R_BRACE) 5724 5725 return func 5726 5727 def _parse_function_call( 5728 self, 5729 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5730 anonymous: bool = False, 5731 optional_parens: bool = True, 5732 any_token: bool = False, 5733 ) -> t.Optional[exp.Expression]: 5734 if not self._curr: 5735 return None 5736 5737 comments = self._curr.comments 5738 token = self._curr 5739 token_type = self._curr.token_type 5740 this = self._curr.text 5741 upper = this.upper() 5742 5743 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5744 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5745 self._advance() 5746 return self._parse_window(parser(self)) 5747 5748 if not self._next or self._next.token_type != TokenType.L_PAREN: 5749 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5750 self._advance() 5751 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5752 5753 return None 5754 5755 if any_token: 5756 if token_type in self.RESERVED_TOKENS: 5757 return None 5758 elif token_type not in self.FUNC_TOKENS: 5759 return None 5760 5761 self._advance(2) 5762 5763 parser = self.FUNCTION_PARSERS.get(upper) 5764 if parser and not anonymous: 5765 this = parser(self) 5766 else: 5767 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5768 5769 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5770 this = self.expression( 5771 subquery_predicate, comments=comments, this=self._parse_select() 5772 ) 5773 self._match_r_paren() 5774 return this 5775 5776 if functions is None: 5777 functions = self.FUNCTIONS 5778 5779 function = functions.get(upper) 5780 known_function = function and not anonymous 5781 5782 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5783 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5784 5785 post_func_comments = self._curr and self._curr.comments 5786 if known_function and post_func_comments: 5787 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5788 # call we'll construct it as exp.Anonymous, even if it's "known" 5789 if any( 5790 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5791 for comment in post_func_comments 5792 ): 5793 known_function = False 5794 5795 if alias and known_function: 5796 args = self._kv_to_prop_eq(args) 5797 5798 if known_function: 5799 func_builder = t.cast(t.Callable, function) 5800 5801 if "dialect" in func_builder.__code__.co_varnames: 5802 func = func_builder(args, dialect=self.dialect) 5803 else: 5804 func = func_builder(args) 5805 5806 func = self.validate_expression(func, args) 5807 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5808 func.meta["name"] = this 5809 5810 this = func 5811 else: 5812 if token_type == TokenType.IDENTIFIER: 5813 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5814 5815 this = self.expression(exp.Anonymous, this=this, expressions=args) 5816 this = this.update_positions(token) 5817 5818 if isinstance(this, exp.Expression): 5819 this.add_comments(comments) 5820 5821 self._match_r_paren(this) 5822 return self._parse_window(this) 5823 5824 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5825 return expression 5826 5827 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5828 transformed = [] 5829 5830 for index, e in enumerate(expressions): 5831 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5832 if isinstance(e, exp.Alias): 5833 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5834 5835 if not isinstance(e, exp.PropertyEQ): 5836 e = self.expression( 5837 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5838 ) 5839 5840 if isinstance(e.this, exp.Column): 5841 e.this.replace(e.this.this) 5842 else: 5843 e = self._to_prop_eq(e, index) 5844 5845 transformed.append(e) 5846 5847 return transformed 5848 5849 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5850 return self._parse_statement() 5851 5852 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5853 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5854 5855 def _parse_user_defined_function( 5856 self, kind: t.Optional[TokenType] = None 5857 ) -> t.Optional[exp.Expression]: 5858 this = self._parse_table_parts(schema=True) 5859 5860 if not self._match(TokenType.L_PAREN): 5861 return this 5862 5863 expressions = self._parse_csv(self._parse_function_parameter) 5864 self._match_r_paren() 5865 return self.expression( 5866 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5867 ) 5868 5869 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5870 literal = self._parse_primary() 5871 if literal: 5872 return self.expression(exp.Introducer, this=token.text, expression=literal) 5873 5874 return self._identifier_expression(token) 5875 5876 def _parse_session_parameter(self) -> exp.SessionParameter: 5877 kind = None 5878 this = self._parse_id_var() or self._parse_primary() 5879 5880 if this and self._match(TokenType.DOT): 5881 kind = this.name 5882 this = self._parse_var() or self._parse_primary() 5883 5884 return self.expression(exp.SessionParameter, this=this, kind=kind) 5885 5886 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5887 return self._parse_id_var() 5888 5889 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5890 index = self._index 5891 5892 if self._match(TokenType.L_PAREN): 5893 expressions = t.cast( 5894 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5895 ) 5896 5897 if not self._match(TokenType.R_PAREN): 5898 self._retreat(index) 5899 else: 5900 expressions = [self._parse_lambda_arg()] 5901 5902 if self._match_set(self.LAMBDAS): 5903 return self.LAMBDAS[self._prev.token_type](self, expressions) 5904 5905 self._retreat(index) 5906 5907 this: t.Optional[exp.Expression] 5908 5909 if self._match(TokenType.DISTINCT): 5910 this = self.expression( 5911 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5912 ) 5913 else: 5914 this = self._parse_select_or_expression(alias=alias) 5915 5916 return self._parse_limit( 5917 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5918 ) 5919 5920 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5921 index = self._index 5922 if not self._match(TokenType.L_PAREN): 5923 return this 5924 5925 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5926 # expr can be of both types 5927 if self._match_set(self.SELECT_START_TOKENS): 5928 self._retreat(index) 5929 return this 5930 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5931 self._match_r_paren() 5932 return self.expression(exp.Schema, this=this, expressions=args) 5933 5934 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5935 return self._parse_column_def(self._parse_field(any_token=True)) 5936 5937 def _parse_column_def( 5938 self, this: t.Optional[exp.Expression], computed_column: bool = True 5939 ) -> t.Optional[exp.Expression]: 5940 # column defs are not really columns, they're identifiers 5941 if isinstance(this, exp.Column): 5942 this = this.this 5943 5944 if not computed_column: 5945 self._match(TokenType.ALIAS) 5946 5947 kind = self._parse_types(schema=True) 5948 5949 if self._match_text_seq("FOR", "ORDINALITY"): 5950 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5951 5952 constraints: t.List[exp.Expression] = [] 5953 5954 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5955 ("ALIAS", "MATERIALIZED") 5956 ): 5957 persisted = self._prev.text.upper() == "MATERIALIZED" 5958 constraint_kind = exp.ComputedColumnConstraint( 5959 this=self._parse_assignment(), 5960 persisted=persisted or self._match_text_seq("PERSISTED"), 5961 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5962 ) 5963 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5964 elif ( 5965 kind 5966 and self._match(TokenType.ALIAS, advance=False) 5967 and ( 5968 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5969 or (self._next and self._next.token_type == TokenType.L_PAREN) 5970 ) 5971 ): 5972 self._advance() 5973 constraints.append( 5974 self.expression( 5975 exp.ColumnConstraint, 5976 kind=exp.ComputedColumnConstraint(this=self._parse_disjunction()), 5977 ) 5978 ) 5979 5980 while True: 5981 constraint = self._parse_column_constraint() 5982 if not constraint: 5983 break 5984 constraints.append(constraint) 5985 5986 if not kind and not constraints: 5987 return this 5988 5989 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5990 5991 def _parse_auto_increment( 5992 self, 5993 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5994 start = None 5995 increment = None 5996 5997 if self._match(TokenType.L_PAREN, advance=False): 5998 args = self._parse_wrapped_csv(self._parse_bitwise) 5999 start = seq_get(args, 0) 6000 increment = seq_get(args, 1) 6001 elif self._match_text_seq("START"): 6002 start = self._parse_bitwise() 6003 self._match_text_seq("INCREMENT") 6004 increment = self._parse_bitwise() 6005 6006 if start and increment: 6007 return exp.GeneratedAsIdentityColumnConstraint( 6008 start=start, increment=increment, this=False 6009 ) 6010 6011 return exp.AutoIncrementColumnConstraint() 6012 6013 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6014 if not self._match_text_seq("REFRESH"): 6015 self._retreat(self._index - 1) 6016 return None 6017 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6018 6019 def _parse_compress(self) -> exp.CompressColumnConstraint: 6020 if self._match(TokenType.L_PAREN, advance=False): 6021 return self.expression( 6022 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6023 ) 6024 6025 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6026 6027 def _parse_generated_as_identity( 6028 self, 6029 ) -> ( 6030 exp.GeneratedAsIdentityColumnConstraint 6031 | exp.ComputedColumnConstraint 6032 | exp.GeneratedAsRowColumnConstraint 6033 ): 6034 if self._match_text_seq("BY", "DEFAULT"): 6035 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6036 this = self.expression( 6037 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6038 ) 6039 else: 6040 self._match_text_seq("ALWAYS") 6041 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6042 6043 self._match(TokenType.ALIAS) 6044 6045 if self._match_text_seq("ROW"): 6046 start = self._match_text_seq("START") 6047 if not start: 6048 self._match(TokenType.END) 6049 hidden = self._match_text_seq("HIDDEN") 6050 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6051 6052 identity = self._match_text_seq("IDENTITY") 6053 6054 if self._match(TokenType.L_PAREN): 6055 if self._match(TokenType.START_WITH): 6056 this.set("start", self._parse_bitwise()) 6057 if self._match_text_seq("INCREMENT", "BY"): 6058 this.set("increment", self._parse_bitwise()) 6059 if self._match_text_seq("MINVALUE"): 6060 this.set("minvalue", self._parse_bitwise()) 6061 if self._match_text_seq("MAXVALUE"): 6062 this.set("maxvalue", self._parse_bitwise()) 6063 6064 if self._match_text_seq("CYCLE"): 6065 this.set("cycle", True) 6066 elif self._match_text_seq("NO", "CYCLE"): 6067 this.set("cycle", False) 6068 6069 if not identity: 6070 this.set("expression", self._parse_range()) 6071 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6072 args = self._parse_csv(self._parse_bitwise) 6073 this.set("start", seq_get(args, 0)) 6074 this.set("increment", seq_get(args, 1)) 6075 6076 self._match_r_paren() 6077 6078 return this 6079 6080 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6081 self._match_text_seq("LENGTH") 6082 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6083 6084 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6085 if self._match_text_seq("NULL"): 6086 return self.expression(exp.NotNullColumnConstraint) 6087 if self._match_text_seq("CASESPECIFIC"): 6088 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6089 if self._match_text_seq("FOR", "REPLICATION"): 6090 return self.expression(exp.NotForReplicationColumnConstraint) 6091 6092 # Unconsume the `NOT` token 6093 self._retreat(self._index - 1) 6094 return None 6095 6096 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6097 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6098 6099 procedure_option_follows = ( 6100 self._match(TokenType.WITH, advance=False) 6101 and self._next 6102 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6103 ) 6104 6105 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6106 return self.expression( 6107 exp.ColumnConstraint, 6108 this=this, 6109 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6110 ) 6111 6112 return this 6113 6114 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6115 if not self._match(TokenType.CONSTRAINT): 6116 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6117 6118 return self.expression( 6119 exp.Constraint, 6120 this=self._parse_id_var(), 6121 expressions=self._parse_unnamed_constraints(), 6122 ) 6123 6124 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6125 constraints = [] 6126 while True: 6127 constraint = self._parse_unnamed_constraint() or self._parse_function() 6128 if not constraint: 6129 break 6130 constraints.append(constraint) 6131 6132 return constraints 6133 6134 def _parse_unnamed_constraint( 6135 self, constraints: t.Optional[t.Collection[str]] = None 6136 ) -> t.Optional[exp.Expression]: 6137 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6138 constraints or self.CONSTRAINT_PARSERS 6139 ): 6140 return None 6141 6142 constraint = self._prev.text.upper() 6143 if constraint not in self.CONSTRAINT_PARSERS: 6144 self.raise_error(f"No parser found for schema constraint {constraint}.") 6145 6146 return self.CONSTRAINT_PARSERS[constraint](self) 6147 6148 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6149 return self._parse_id_var(any_token=False) 6150 6151 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6152 self._match_text_seq("KEY") 6153 return self.expression( 6154 exp.UniqueColumnConstraint, 6155 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6156 this=self._parse_schema(self._parse_unique_key()), 6157 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6158 on_conflict=self._parse_on_conflict(), 6159 options=self._parse_key_constraint_options(), 6160 ) 6161 6162 def _parse_key_constraint_options(self) -> t.List[str]: 6163 options = [] 6164 while True: 6165 if not self._curr: 6166 break 6167 6168 if self._match(TokenType.ON): 6169 action = None 6170 on = self._advance_any() and self._prev.text 6171 6172 if self._match_text_seq("NO", "ACTION"): 6173 action = "NO ACTION" 6174 elif self._match_text_seq("CASCADE"): 6175 action = "CASCADE" 6176 elif self._match_text_seq("RESTRICT"): 6177 action = "RESTRICT" 6178 elif self._match_pair(TokenType.SET, TokenType.NULL): 6179 action = "SET NULL" 6180 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6181 action = "SET DEFAULT" 6182 else: 6183 self.raise_error("Invalid key constraint") 6184 6185 options.append(f"ON {on} {action}") 6186 else: 6187 var = self._parse_var_from_options( 6188 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6189 ) 6190 if not var: 6191 break 6192 options.append(var.name) 6193 6194 return options 6195 6196 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6197 if match and not self._match(TokenType.REFERENCES): 6198 return None 6199 6200 expressions = None 6201 this = self._parse_table(schema=True) 6202 options = self._parse_key_constraint_options() 6203 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6204 6205 def _parse_foreign_key(self) -> exp.ForeignKey: 6206 expressions = ( 6207 self._parse_wrapped_id_vars() 6208 if not self._match(TokenType.REFERENCES, advance=False) 6209 else None 6210 ) 6211 reference = self._parse_references() 6212 on_options = {} 6213 6214 while self._match(TokenType.ON): 6215 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6216 self.raise_error("Expected DELETE or UPDATE") 6217 6218 kind = self._prev.text.lower() 6219 6220 if self._match_text_seq("NO", "ACTION"): 6221 action = "NO ACTION" 6222 elif self._match(TokenType.SET): 6223 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6224 action = "SET " + self._prev.text.upper() 6225 else: 6226 self._advance() 6227 action = self._prev.text.upper() 6228 6229 on_options[kind] = action 6230 6231 return self.expression( 6232 exp.ForeignKey, 6233 expressions=expressions, 6234 reference=reference, 6235 options=self._parse_key_constraint_options(), 6236 **on_options, # type: ignore 6237 ) 6238 6239 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6240 return self._parse_ordered() or self._parse_field() 6241 6242 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6243 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6244 self._retreat(self._index - 1) 6245 return None 6246 6247 id_vars = self._parse_wrapped_id_vars() 6248 return self.expression( 6249 exp.PeriodForSystemTimeConstraint, 6250 this=seq_get(id_vars, 0), 6251 expression=seq_get(id_vars, 1), 6252 ) 6253 6254 def _parse_primary_key( 6255 self, wrapped_optional: bool = False, in_props: bool = False 6256 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6257 desc = ( 6258 self._match_set((TokenType.ASC, TokenType.DESC)) 6259 and self._prev.token_type == TokenType.DESC 6260 ) 6261 6262 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6263 return self.expression( 6264 exp.PrimaryKeyColumnConstraint, 6265 desc=desc, 6266 options=self._parse_key_constraint_options(), 6267 ) 6268 6269 expressions = self._parse_wrapped_csv( 6270 self._parse_primary_key_part, optional=wrapped_optional 6271 ) 6272 options = self._parse_key_constraint_options() 6273 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6274 6275 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6276 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6277 6278 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6279 """ 6280 Parses a datetime column in ODBC format. We parse the column into the corresponding 6281 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6282 same as we did for `DATE('yyyy-mm-dd')`. 6283 6284 Reference: 6285 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6286 """ 6287 self._match(TokenType.VAR) 6288 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6289 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6290 if not self._match(TokenType.R_BRACE): 6291 self.raise_error("Expected }") 6292 return expression 6293 6294 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6295 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6296 return this 6297 6298 bracket_kind = self._prev.token_type 6299 if ( 6300 bracket_kind == TokenType.L_BRACE 6301 and self._curr 6302 and self._curr.token_type == TokenType.VAR 6303 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6304 ): 6305 return self._parse_odbc_datetime_literal() 6306 6307 expressions = self._parse_csv( 6308 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6309 ) 6310 6311 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6312 self.raise_error("Expected ]") 6313 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6314 self.raise_error("Expected }") 6315 6316 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6317 if bracket_kind == TokenType.L_BRACE: 6318 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6319 elif not this: 6320 this = build_array_constructor( 6321 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6322 ) 6323 else: 6324 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6325 if constructor_type: 6326 return build_array_constructor( 6327 constructor_type, 6328 args=expressions, 6329 bracket_kind=bracket_kind, 6330 dialect=self.dialect, 6331 ) 6332 6333 expressions = apply_index_offset( 6334 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6335 ) 6336 this = self.expression( 6337 exp.Bracket, 6338 this=this, 6339 expressions=expressions, 6340 comments=this.pop_comments(), 6341 ) 6342 6343 self._add_comments(this) 6344 return self._parse_bracket(this) 6345 6346 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6347 if self._match(TokenType.COLON): 6348 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6349 return this 6350 6351 def _parse_case(self) -> t.Optional[exp.Expression]: 6352 ifs = [] 6353 default = None 6354 6355 comments = self._prev_comments 6356 expression = self._parse_assignment() 6357 6358 while self._match(TokenType.WHEN): 6359 this = self._parse_assignment() 6360 self._match(TokenType.THEN) 6361 then = self._parse_assignment() 6362 ifs.append(self.expression(exp.If, this=this, true=then)) 6363 6364 if self._match(TokenType.ELSE): 6365 default = self._parse_assignment() 6366 6367 if not self._match(TokenType.END): 6368 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6369 default = exp.column("interval") 6370 else: 6371 self.raise_error("Expected END after CASE", self._prev) 6372 6373 return self.expression( 6374 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6375 ) 6376 6377 def _parse_if(self) -> t.Optional[exp.Expression]: 6378 if self._match(TokenType.L_PAREN): 6379 args = self._parse_csv( 6380 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6381 ) 6382 this = self.validate_expression(exp.If.from_arg_list(args), args) 6383 self._match_r_paren() 6384 else: 6385 index = self._index - 1 6386 6387 if self.NO_PAREN_IF_COMMANDS and index == 0: 6388 return self._parse_as_command(self._prev) 6389 6390 condition = self._parse_assignment() 6391 6392 if not condition: 6393 self._retreat(index) 6394 return None 6395 6396 self._match(TokenType.THEN) 6397 true = self._parse_assignment() 6398 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6399 self._match(TokenType.END) 6400 this = self.expression(exp.If, this=condition, true=true, false=false) 6401 6402 return this 6403 6404 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6405 if not self._match_text_seq("VALUE", "FOR"): 6406 self._retreat(self._index - 1) 6407 return None 6408 6409 return self.expression( 6410 exp.NextValueFor, 6411 this=self._parse_column(), 6412 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6413 ) 6414 6415 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6416 this = self._parse_function() or self._parse_var_or_string(upper=True) 6417 6418 if self._match(TokenType.FROM): 6419 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6420 6421 if not self._match(TokenType.COMMA): 6422 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6423 6424 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6425 6426 def _parse_gap_fill(self) -> exp.GapFill: 6427 self._match(TokenType.TABLE) 6428 this = self._parse_table() 6429 6430 self._match(TokenType.COMMA) 6431 args = [this, *self._parse_csv(self._parse_lambda)] 6432 6433 gap_fill = exp.GapFill.from_arg_list(args) 6434 return self.validate_expression(gap_fill, args) 6435 6436 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6437 this = self._parse_assignment() 6438 6439 if not self._match(TokenType.ALIAS): 6440 if self._match(TokenType.COMMA): 6441 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6442 6443 self.raise_error("Expected AS after CAST") 6444 6445 fmt = None 6446 to = self._parse_types() 6447 6448 default = self._match(TokenType.DEFAULT) 6449 if default: 6450 default = self._parse_bitwise() 6451 self._match_text_seq("ON", "CONVERSION", "ERROR") 6452 6453 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6454 fmt_string = self._parse_string() 6455 fmt = self._parse_at_time_zone(fmt_string) 6456 6457 if not to: 6458 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6459 if to.this in exp.DataType.TEMPORAL_TYPES: 6460 this = self.expression( 6461 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6462 this=this, 6463 format=exp.Literal.string( 6464 format_time( 6465 fmt_string.this if fmt_string else "", 6466 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6467 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6468 ) 6469 ), 6470 safe=safe, 6471 ) 6472 6473 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6474 this.set("zone", fmt.args["zone"]) 6475 return this 6476 elif not to: 6477 self.raise_error("Expected TYPE after CAST") 6478 elif isinstance(to, exp.Identifier): 6479 to = exp.DataType.build(to.name, udt=True) 6480 elif to.this == exp.DataType.Type.CHAR: 6481 if self._match(TokenType.CHARACTER_SET): 6482 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6483 6484 return self.expression( 6485 exp.Cast if strict else exp.TryCast, 6486 this=this, 6487 to=to, 6488 format=fmt, 6489 safe=safe, 6490 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6491 default=default, 6492 ) 6493 6494 def _parse_string_agg(self) -> exp.GroupConcat: 6495 if self._match(TokenType.DISTINCT): 6496 args: t.List[t.Optional[exp.Expression]] = [ 6497 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6498 ] 6499 if self._match(TokenType.COMMA): 6500 args.extend(self._parse_csv(self._parse_assignment)) 6501 else: 6502 args = self._parse_csv(self._parse_assignment) # type: ignore 6503 6504 if self._match_text_seq("ON", "OVERFLOW"): 6505 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6506 if self._match_text_seq("ERROR"): 6507 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6508 else: 6509 self._match_text_seq("TRUNCATE") 6510 on_overflow = self.expression( 6511 exp.OverflowTruncateBehavior, 6512 this=self._parse_string(), 6513 with_count=( 6514 self._match_text_seq("WITH", "COUNT") 6515 or not self._match_text_seq("WITHOUT", "COUNT") 6516 ), 6517 ) 6518 else: 6519 on_overflow = None 6520 6521 index = self._index 6522 if not self._match(TokenType.R_PAREN) and args: 6523 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6524 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6525 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6526 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6527 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6528 6529 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6530 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6531 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6532 if not self._match_text_seq("WITHIN", "GROUP"): 6533 self._retreat(index) 6534 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6535 6536 # The corresponding match_r_paren will be called in parse_function (caller) 6537 self._match_l_paren() 6538 6539 return self.expression( 6540 exp.GroupConcat, 6541 this=self._parse_order(this=seq_get(args, 0)), 6542 separator=seq_get(args, 1), 6543 on_overflow=on_overflow, 6544 ) 6545 6546 def _parse_convert( 6547 self, strict: bool, safe: t.Optional[bool] = None 6548 ) -> t.Optional[exp.Expression]: 6549 this = self._parse_bitwise() 6550 6551 if self._match(TokenType.USING): 6552 to: t.Optional[exp.Expression] = self.expression( 6553 exp.CharacterSet, this=self._parse_var() 6554 ) 6555 elif self._match(TokenType.COMMA): 6556 to = self._parse_types() 6557 else: 6558 to = None 6559 6560 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6561 6562 def _parse_xml_table(self) -> exp.XMLTable: 6563 namespaces = None 6564 passing = None 6565 columns = None 6566 6567 if self._match_text_seq("XMLNAMESPACES", "("): 6568 namespaces = self._parse_xml_namespace() 6569 self._match_text_seq(")", ",") 6570 6571 this = self._parse_string() 6572 6573 if self._match_text_seq("PASSING"): 6574 # The BY VALUE keywords are optional and are provided for semantic clarity 6575 self._match_text_seq("BY", "VALUE") 6576 passing = self._parse_csv(self._parse_column) 6577 6578 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6579 6580 if self._match_text_seq("COLUMNS"): 6581 columns = self._parse_csv(self._parse_field_def) 6582 6583 return self.expression( 6584 exp.XMLTable, 6585 this=this, 6586 namespaces=namespaces, 6587 passing=passing, 6588 columns=columns, 6589 by_ref=by_ref, 6590 ) 6591 6592 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6593 namespaces = [] 6594 6595 while True: 6596 if self._match(TokenType.DEFAULT): 6597 uri = self._parse_string() 6598 else: 6599 uri = self._parse_alias(self._parse_string()) 6600 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6601 if not self._match(TokenType.COMMA): 6602 break 6603 6604 return namespaces 6605 6606 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6607 """ 6608 There are generally two variants of the DECODE function: 6609 6610 - DECODE(bin, charset) 6611 - DECODE(expression, search, result [, search, result] ... [, default]) 6612 6613 The second variant will always be parsed into a CASE expression. Note that NULL 6614 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6615 instead of relying on pattern matching. 6616 """ 6617 args = self._parse_csv(self._parse_assignment) 6618 6619 if len(args) < 3: 6620 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6621 6622 expression, *expressions = args 6623 if not expression: 6624 return None 6625 6626 ifs = [] 6627 for search, result in zip(expressions[::2], expressions[1::2]): 6628 if not search or not result: 6629 return None 6630 6631 if isinstance(search, exp.Literal): 6632 ifs.append( 6633 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6634 ) 6635 elif isinstance(search, exp.Null): 6636 ifs.append( 6637 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6638 ) 6639 else: 6640 cond = exp.or_( 6641 exp.EQ(this=expression.copy(), expression=search), 6642 exp.and_( 6643 exp.Is(this=expression.copy(), expression=exp.Null()), 6644 exp.Is(this=search.copy(), expression=exp.Null()), 6645 copy=False, 6646 ), 6647 copy=False, 6648 ) 6649 ifs.append(exp.If(this=cond, true=result)) 6650 6651 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6652 6653 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6654 self._match_text_seq("KEY") 6655 key = self._parse_column() 6656 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6657 self._match_text_seq("VALUE") 6658 value = self._parse_bitwise() 6659 6660 if not key and not value: 6661 return None 6662 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6663 6664 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6665 if not this or not self._match_text_seq("FORMAT", "JSON"): 6666 return this 6667 6668 return self.expression(exp.FormatJson, this=this) 6669 6670 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6671 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6672 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6673 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6674 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6675 else: 6676 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6677 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6678 6679 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6680 6681 if not empty and not error and not null: 6682 return None 6683 6684 return self.expression( 6685 exp.OnCondition, 6686 empty=empty, 6687 error=error, 6688 null=null, 6689 ) 6690 6691 def _parse_on_handling( 6692 self, on: str, *values: str 6693 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6694 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6695 for value in values: 6696 if self._match_text_seq(value, "ON", on): 6697 return f"{value} ON {on}" 6698 6699 index = self._index 6700 if self._match(TokenType.DEFAULT): 6701 default_value = self._parse_bitwise() 6702 if self._match_text_seq("ON", on): 6703 return default_value 6704 6705 self._retreat(index) 6706 6707 return None 6708 6709 @t.overload 6710 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6711 6712 @t.overload 6713 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6714 6715 def _parse_json_object(self, agg=False): 6716 star = self._parse_star() 6717 expressions = ( 6718 [star] 6719 if star 6720 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6721 ) 6722 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6723 6724 unique_keys = None 6725 if self._match_text_seq("WITH", "UNIQUE"): 6726 unique_keys = True 6727 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6728 unique_keys = False 6729 6730 self._match_text_seq("KEYS") 6731 6732 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6733 self._parse_type() 6734 ) 6735 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6736 6737 return self.expression( 6738 exp.JSONObjectAgg if agg else exp.JSONObject, 6739 expressions=expressions, 6740 null_handling=null_handling, 6741 unique_keys=unique_keys, 6742 return_type=return_type, 6743 encoding=encoding, 6744 ) 6745 6746 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6747 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6748 if not self._match_text_seq("NESTED"): 6749 this = self._parse_id_var() 6750 kind = self._parse_types(allow_identifiers=False) 6751 nested = None 6752 else: 6753 this = None 6754 kind = None 6755 nested = True 6756 6757 path = self._match_text_seq("PATH") and self._parse_string() 6758 nested_schema = nested and self._parse_json_schema() 6759 6760 return self.expression( 6761 exp.JSONColumnDef, 6762 this=this, 6763 kind=kind, 6764 path=path, 6765 nested_schema=nested_schema, 6766 ) 6767 6768 def _parse_json_schema(self) -> exp.JSONSchema: 6769 self._match_text_seq("COLUMNS") 6770 return self.expression( 6771 exp.JSONSchema, 6772 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6773 ) 6774 6775 def _parse_json_table(self) -> exp.JSONTable: 6776 this = self._parse_format_json(self._parse_bitwise()) 6777 path = self._match(TokenType.COMMA) and self._parse_string() 6778 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6779 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6780 schema = self._parse_json_schema() 6781 6782 return exp.JSONTable( 6783 this=this, 6784 schema=schema, 6785 path=path, 6786 error_handling=error_handling, 6787 empty_handling=empty_handling, 6788 ) 6789 6790 def _parse_match_against(self) -> exp.MatchAgainst: 6791 expressions = self._parse_csv(self._parse_column) 6792 6793 self._match_text_seq(")", "AGAINST", "(") 6794 6795 this = self._parse_string() 6796 6797 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6798 modifier = "IN NATURAL LANGUAGE MODE" 6799 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6800 modifier = f"{modifier} WITH QUERY EXPANSION" 6801 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6802 modifier = "IN BOOLEAN MODE" 6803 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6804 modifier = "WITH QUERY EXPANSION" 6805 else: 6806 modifier = None 6807 6808 return self.expression( 6809 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6810 ) 6811 6812 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6813 def _parse_open_json(self) -> exp.OpenJSON: 6814 this = self._parse_bitwise() 6815 path = self._match(TokenType.COMMA) and self._parse_string() 6816 6817 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6818 this = self._parse_field(any_token=True) 6819 kind = self._parse_types() 6820 path = self._parse_string() 6821 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6822 6823 return self.expression( 6824 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6825 ) 6826 6827 expressions = None 6828 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6829 self._match_l_paren() 6830 expressions = self._parse_csv(_parse_open_json_column_def) 6831 6832 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6833 6834 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6835 args = self._parse_csv(self._parse_bitwise) 6836 6837 if self._match(TokenType.IN): 6838 return self.expression( 6839 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6840 ) 6841 6842 if haystack_first: 6843 haystack = seq_get(args, 0) 6844 needle = seq_get(args, 1) 6845 else: 6846 haystack = seq_get(args, 1) 6847 needle = seq_get(args, 0) 6848 6849 return self.expression( 6850 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6851 ) 6852 6853 def _parse_predict(self) -> exp.Predict: 6854 self._match_text_seq("MODEL") 6855 this = self._parse_table() 6856 6857 self._match(TokenType.COMMA) 6858 self._match_text_seq("TABLE") 6859 6860 return self.expression( 6861 exp.Predict, 6862 this=this, 6863 expression=self._parse_table(), 6864 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6865 ) 6866 6867 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6868 args = self._parse_csv(self._parse_table) 6869 return exp.JoinHint(this=func_name.upper(), expressions=args) 6870 6871 def _parse_substring(self) -> exp.Substring: 6872 # Postgres supports the form: substring(string [from int] [for int]) 6873 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6874 6875 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6876 6877 if self._match(TokenType.FROM): 6878 args.append(self._parse_bitwise()) 6879 if self._match(TokenType.FOR): 6880 if len(args) == 1: 6881 args.append(exp.Literal.number(1)) 6882 args.append(self._parse_bitwise()) 6883 6884 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6885 6886 def _parse_trim(self) -> exp.Trim: 6887 # https://www.w3resource.com/sql/character-functions/trim.php 6888 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6889 6890 position = None 6891 collation = None 6892 expression = None 6893 6894 if self._match_texts(self.TRIM_TYPES): 6895 position = self._prev.text.upper() 6896 6897 this = self._parse_bitwise() 6898 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6899 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6900 expression = self._parse_bitwise() 6901 6902 if invert_order: 6903 this, expression = expression, this 6904 6905 if self._match(TokenType.COLLATE): 6906 collation = self._parse_bitwise() 6907 6908 return self.expression( 6909 exp.Trim, this=this, position=position, expression=expression, collation=collation 6910 ) 6911 6912 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6913 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6914 6915 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6916 return self._parse_window(self._parse_id_var(), alias=True) 6917 6918 def _parse_respect_or_ignore_nulls( 6919 self, this: t.Optional[exp.Expression] 6920 ) -> t.Optional[exp.Expression]: 6921 if self._match_text_seq("IGNORE", "NULLS"): 6922 return self.expression(exp.IgnoreNulls, this=this) 6923 if self._match_text_seq("RESPECT", "NULLS"): 6924 return self.expression(exp.RespectNulls, this=this) 6925 return this 6926 6927 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6928 if self._match(TokenType.HAVING): 6929 self._match_texts(("MAX", "MIN")) 6930 max = self._prev.text.upper() != "MIN" 6931 return self.expression( 6932 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6933 ) 6934 6935 return this 6936 6937 def _parse_window( 6938 self, this: t.Optional[exp.Expression], alias: bool = False 6939 ) -> t.Optional[exp.Expression]: 6940 func = this 6941 comments = func.comments if isinstance(func, exp.Expression) else None 6942 6943 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6944 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6945 if self._match_text_seq("WITHIN", "GROUP"): 6946 order = self._parse_wrapped(self._parse_order) 6947 this = self.expression(exp.WithinGroup, this=this, expression=order) 6948 6949 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6950 self._match(TokenType.WHERE) 6951 this = self.expression( 6952 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6953 ) 6954 self._match_r_paren() 6955 6956 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6957 # Some dialects choose to implement and some do not. 6958 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6959 6960 # There is some code above in _parse_lambda that handles 6961 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6962 6963 # The below changes handle 6964 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6965 6966 # Oracle allows both formats 6967 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6968 # and Snowflake chose to do the same for familiarity 6969 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6970 if isinstance(this, exp.AggFunc): 6971 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6972 6973 if ignore_respect and ignore_respect is not this: 6974 ignore_respect.replace(ignore_respect.this) 6975 this = self.expression(ignore_respect.__class__, this=this) 6976 6977 this = self._parse_respect_or_ignore_nulls(this) 6978 6979 # bigquery select from window x AS (partition by ...) 6980 if alias: 6981 over = None 6982 self._match(TokenType.ALIAS) 6983 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6984 return this 6985 else: 6986 over = self._prev.text.upper() 6987 6988 if comments and isinstance(func, exp.Expression): 6989 func.pop_comments() 6990 6991 if not self._match(TokenType.L_PAREN): 6992 return self.expression( 6993 exp.Window, 6994 comments=comments, 6995 this=this, 6996 alias=self._parse_id_var(False), 6997 over=over, 6998 ) 6999 7000 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7001 7002 first = self._match(TokenType.FIRST) 7003 if self._match_text_seq("LAST"): 7004 first = False 7005 7006 partition, order = self._parse_partition_and_order() 7007 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7008 7009 if kind: 7010 self._match(TokenType.BETWEEN) 7011 start = self._parse_window_spec() 7012 self._match(TokenType.AND) 7013 end = self._parse_window_spec() 7014 exclude = ( 7015 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7016 if self._match_text_seq("EXCLUDE") 7017 else None 7018 ) 7019 7020 spec = self.expression( 7021 exp.WindowSpec, 7022 kind=kind, 7023 start=start["value"], 7024 start_side=start["side"], 7025 end=end["value"], 7026 end_side=end["side"], 7027 exclude=exclude, 7028 ) 7029 else: 7030 spec = None 7031 7032 self._match_r_paren() 7033 7034 window = self.expression( 7035 exp.Window, 7036 comments=comments, 7037 this=this, 7038 partition_by=partition, 7039 order=order, 7040 spec=spec, 7041 alias=window_alias, 7042 over=over, 7043 first=first, 7044 ) 7045 7046 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7047 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7048 return self._parse_window(window, alias=alias) 7049 7050 return window 7051 7052 def _parse_partition_and_order( 7053 self, 7054 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7055 return self._parse_partition_by(), self._parse_order() 7056 7057 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7058 self._match(TokenType.BETWEEN) 7059 7060 return { 7061 "value": ( 7062 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7063 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7064 or self._parse_bitwise() 7065 ), 7066 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7067 } 7068 7069 def _parse_alias( 7070 self, this: t.Optional[exp.Expression], explicit: bool = False 7071 ) -> t.Optional[exp.Expression]: 7072 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7073 # so this section tries to parse the clause version and if it fails, it treats the token 7074 # as an identifier (alias) 7075 if self._can_parse_limit_or_offset(): 7076 return this 7077 7078 any_token = self._match(TokenType.ALIAS) 7079 comments = self._prev_comments or [] 7080 7081 if explicit and not any_token: 7082 return this 7083 7084 if self._match(TokenType.L_PAREN): 7085 aliases = self.expression( 7086 exp.Aliases, 7087 comments=comments, 7088 this=this, 7089 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7090 ) 7091 self._match_r_paren(aliases) 7092 return aliases 7093 7094 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7095 self.STRING_ALIASES and self._parse_string_as_identifier() 7096 ) 7097 7098 if alias: 7099 comments.extend(alias.pop_comments()) 7100 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7101 column = this.this 7102 7103 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7104 if not this.comments and column and column.comments: 7105 this.comments = column.pop_comments() 7106 7107 return this 7108 7109 def _parse_id_var( 7110 self, 7111 any_token: bool = True, 7112 tokens: t.Optional[t.Collection[TokenType]] = None, 7113 ) -> t.Optional[exp.Expression]: 7114 expression = self._parse_identifier() 7115 if not expression and ( 7116 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7117 ): 7118 quoted = self._prev.token_type == TokenType.STRING 7119 expression = self._identifier_expression(quoted=quoted) 7120 7121 return expression 7122 7123 def _parse_string(self) -> t.Optional[exp.Expression]: 7124 if self._match_set(self.STRING_PARSERS): 7125 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7126 return self._parse_placeholder() 7127 7128 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7129 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7130 if output: 7131 output.update_positions(self._prev) 7132 return output 7133 7134 def _parse_number(self) -> t.Optional[exp.Expression]: 7135 if self._match_set(self.NUMERIC_PARSERS): 7136 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7137 return self._parse_placeholder() 7138 7139 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7140 if self._match(TokenType.IDENTIFIER): 7141 return self._identifier_expression(quoted=True) 7142 return self._parse_placeholder() 7143 7144 def _parse_var( 7145 self, 7146 any_token: bool = False, 7147 tokens: t.Optional[t.Collection[TokenType]] = None, 7148 upper: bool = False, 7149 ) -> t.Optional[exp.Expression]: 7150 if ( 7151 (any_token and self._advance_any()) 7152 or self._match(TokenType.VAR) 7153 or (self._match_set(tokens) if tokens else False) 7154 ): 7155 return self.expression( 7156 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7157 ) 7158 return self._parse_placeholder() 7159 7160 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7161 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7162 self._advance() 7163 return self._prev 7164 return None 7165 7166 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7167 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7168 7169 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7170 return self._parse_primary() or self._parse_var(any_token=True) 7171 7172 def _parse_null(self) -> t.Optional[exp.Expression]: 7173 if self._match_set(self.NULL_TOKENS): 7174 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7175 return self._parse_placeholder() 7176 7177 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7178 if self._match(TokenType.TRUE): 7179 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7180 if self._match(TokenType.FALSE): 7181 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7182 return self._parse_placeholder() 7183 7184 def _parse_star(self) -> t.Optional[exp.Expression]: 7185 if self._match(TokenType.STAR): 7186 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7187 return self._parse_placeholder() 7188 7189 def _parse_parameter(self) -> exp.Parameter: 7190 this = self._parse_identifier() or self._parse_primary_or_var() 7191 return self.expression(exp.Parameter, this=this) 7192 7193 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7194 if self._match_set(self.PLACEHOLDER_PARSERS): 7195 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7196 if placeholder: 7197 return placeholder 7198 self._advance(-1) 7199 return None 7200 7201 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7202 if not self._match_texts(keywords): 7203 return None 7204 if self._match(TokenType.L_PAREN, advance=False): 7205 return self._parse_wrapped_csv(self._parse_expression) 7206 7207 expression = self._parse_expression() 7208 return [expression] if expression else None 7209 7210 def _parse_csv( 7211 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7212 ) -> t.List[exp.Expression]: 7213 parse_result = parse_method() 7214 items = [parse_result] if parse_result is not None else [] 7215 7216 while self._match(sep): 7217 self._add_comments(parse_result) 7218 parse_result = parse_method() 7219 if parse_result is not None: 7220 items.append(parse_result) 7221 7222 return items 7223 7224 def _parse_tokens( 7225 self, parse_method: t.Callable, expressions: t.Dict 7226 ) -> t.Optional[exp.Expression]: 7227 this = parse_method() 7228 7229 while self._match_set(expressions): 7230 this = self.expression( 7231 expressions[self._prev.token_type], 7232 this=this, 7233 comments=self._prev_comments, 7234 expression=parse_method(), 7235 ) 7236 7237 return this 7238 7239 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 7240 while self._match(TokenType.PIPE_GT): 7241 start = self._curr 7242 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 7243 if not parser: 7244 set_op_query = self._parse_pipe_syntax_set_operator(query) 7245 if not set_op_query: 7246 self._retreat(start) 7247 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 7248 break 7249 7250 query = set_op_query 7251 else: 7252 query = parser(self, query) 7253 7254 return query 7255 7256 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7257 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7258 7259 def _parse_wrapped_csv( 7260 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7261 ) -> t.List[exp.Expression]: 7262 return self._parse_wrapped( 7263 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7264 ) 7265 7266 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7267 wrapped = self._match(TokenType.L_PAREN) 7268 if not wrapped and not optional: 7269 self.raise_error("Expecting (") 7270 parse_result = parse_method() 7271 if wrapped: 7272 self._match_r_paren() 7273 return parse_result 7274 7275 def _parse_expressions(self) -> t.List[exp.Expression]: 7276 return self._parse_csv(self._parse_expression) 7277 7278 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7279 return self._parse_select() or self._parse_set_operations( 7280 self._parse_alias(self._parse_assignment(), explicit=True) 7281 if alias 7282 else self._parse_assignment() 7283 ) 7284 7285 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7286 return self._parse_query_modifiers( 7287 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7288 ) 7289 7290 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7291 this = None 7292 if self._match_texts(self.TRANSACTION_KIND): 7293 this = self._prev.text 7294 7295 self._match_texts(("TRANSACTION", "WORK")) 7296 7297 modes = [] 7298 while True: 7299 mode = [] 7300 while self._match(TokenType.VAR): 7301 mode.append(self._prev.text) 7302 7303 if mode: 7304 modes.append(" ".join(mode)) 7305 if not self._match(TokenType.COMMA): 7306 break 7307 7308 return self.expression(exp.Transaction, this=this, modes=modes) 7309 7310 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7311 chain = None 7312 savepoint = None 7313 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7314 7315 self._match_texts(("TRANSACTION", "WORK")) 7316 7317 if self._match_text_seq("TO"): 7318 self._match_text_seq("SAVEPOINT") 7319 savepoint = self._parse_id_var() 7320 7321 if self._match(TokenType.AND): 7322 chain = not self._match_text_seq("NO") 7323 self._match_text_seq("CHAIN") 7324 7325 if is_rollback: 7326 return self.expression(exp.Rollback, savepoint=savepoint) 7327 7328 return self.expression(exp.Commit, chain=chain) 7329 7330 def _parse_refresh(self) -> exp.Refresh: 7331 self._match(TokenType.TABLE) 7332 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7333 7334 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7335 if not self._prev.text.upper() == "ADD": 7336 return None 7337 7338 self._match(TokenType.COLUMN) 7339 exists_column = self._parse_exists(not_=True) 7340 expression = self._parse_field_def() 7341 7342 if expression: 7343 expression.set("exists", exists_column) 7344 7345 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7346 if self._match_texts(("FIRST", "AFTER")): 7347 position = self._prev.text 7348 column_position = self.expression( 7349 exp.ColumnPosition, this=self._parse_column(), position=position 7350 ) 7351 expression.set("position", column_position) 7352 7353 return expression 7354 7355 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7356 drop = self._match(TokenType.DROP) and self._parse_drop() 7357 if drop and not isinstance(drop, exp.Command): 7358 drop.set("kind", drop.args.get("kind", "COLUMN")) 7359 return drop 7360 7361 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7362 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7363 return self.expression( 7364 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7365 ) 7366 7367 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7368 def _parse_add_column_or_constraint(): 7369 self._match_text_seq("ADD") 7370 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7371 return self.expression( 7372 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7373 ) 7374 return self._parse_add_column() 7375 7376 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7377 "COLUMNS" 7378 ): 7379 schema = self._parse_schema() 7380 7381 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7382 7383 return self._parse_csv(_parse_add_column_or_constraint) 7384 7385 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7386 if self._match_texts(self.ALTER_ALTER_PARSERS): 7387 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7388 7389 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7390 # keyword after ALTER we default to parsing this statement 7391 self._match(TokenType.COLUMN) 7392 column = self._parse_field(any_token=True) 7393 7394 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7395 return self.expression(exp.AlterColumn, this=column, drop=True) 7396 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7397 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7398 if self._match(TokenType.COMMENT): 7399 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7400 if self._match_text_seq("DROP", "NOT", "NULL"): 7401 return self.expression( 7402 exp.AlterColumn, 7403 this=column, 7404 drop=True, 7405 allow_null=True, 7406 ) 7407 if self._match_text_seq("SET", "NOT", "NULL"): 7408 return self.expression( 7409 exp.AlterColumn, 7410 this=column, 7411 allow_null=False, 7412 ) 7413 7414 if self._match_text_seq("SET", "VISIBLE"): 7415 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7416 if self._match_text_seq("SET", "INVISIBLE"): 7417 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7418 7419 self._match_text_seq("SET", "DATA") 7420 self._match_text_seq("TYPE") 7421 return self.expression( 7422 exp.AlterColumn, 7423 this=column, 7424 dtype=self._parse_types(), 7425 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7426 using=self._match(TokenType.USING) and self._parse_assignment(), 7427 ) 7428 7429 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7430 if self._match_texts(("ALL", "EVEN", "AUTO")): 7431 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7432 7433 self._match_text_seq("KEY", "DISTKEY") 7434 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7435 7436 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7437 if compound: 7438 self._match_text_seq("SORTKEY") 7439 7440 if self._match(TokenType.L_PAREN, advance=False): 7441 return self.expression( 7442 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7443 ) 7444 7445 self._match_texts(("AUTO", "NONE")) 7446 return self.expression( 7447 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7448 ) 7449 7450 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7451 index = self._index - 1 7452 7453 partition_exists = self._parse_exists() 7454 if self._match(TokenType.PARTITION, advance=False): 7455 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7456 7457 self._retreat(index) 7458 return self._parse_csv(self._parse_drop_column) 7459 7460 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7461 if self._match(TokenType.COLUMN): 7462 exists = self._parse_exists() 7463 old_column = self._parse_column() 7464 to = self._match_text_seq("TO") 7465 new_column = self._parse_column() 7466 7467 if old_column is None or to is None or new_column is None: 7468 return None 7469 7470 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7471 7472 self._match_text_seq("TO") 7473 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7474 7475 def _parse_alter_table_set(self) -> exp.AlterSet: 7476 alter_set = self.expression(exp.AlterSet) 7477 7478 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7479 "TABLE", "PROPERTIES" 7480 ): 7481 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7482 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7483 alter_set.set("expressions", [self._parse_assignment()]) 7484 elif self._match_texts(("LOGGED", "UNLOGGED")): 7485 alter_set.set("option", exp.var(self._prev.text.upper())) 7486 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7487 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7488 elif self._match_text_seq("LOCATION"): 7489 alter_set.set("location", self._parse_field()) 7490 elif self._match_text_seq("ACCESS", "METHOD"): 7491 alter_set.set("access_method", self._parse_field()) 7492 elif self._match_text_seq("TABLESPACE"): 7493 alter_set.set("tablespace", self._parse_field()) 7494 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7495 alter_set.set("file_format", [self._parse_field()]) 7496 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7497 alter_set.set("file_format", self._parse_wrapped_options()) 7498 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7499 alter_set.set("copy_options", self._parse_wrapped_options()) 7500 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7501 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7502 else: 7503 if self._match_text_seq("SERDE"): 7504 alter_set.set("serde", self._parse_field()) 7505 7506 properties = self._parse_wrapped(self._parse_properties, optional=True) 7507 alter_set.set("expressions", [properties]) 7508 7509 return alter_set 7510 7511 def _parse_alter(self) -> exp.Alter | exp.Command: 7512 start = self._prev 7513 7514 alter_token = self._match_set(self.ALTERABLES) and self._prev 7515 if not alter_token: 7516 return self._parse_as_command(start) 7517 7518 exists = self._parse_exists() 7519 only = self._match_text_seq("ONLY") 7520 this = self._parse_table(schema=True) 7521 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7522 7523 if self._next: 7524 self._advance() 7525 7526 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7527 if parser: 7528 actions = ensure_list(parser(self)) 7529 not_valid = self._match_text_seq("NOT", "VALID") 7530 options = self._parse_csv(self._parse_property) 7531 7532 if not self._curr and actions: 7533 return self.expression( 7534 exp.Alter, 7535 this=this, 7536 kind=alter_token.text.upper(), 7537 exists=exists, 7538 actions=actions, 7539 only=only, 7540 options=options, 7541 cluster=cluster, 7542 not_valid=not_valid, 7543 ) 7544 7545 return self._parse_as_command(start) 7546 7547 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7548 start = self._prev 7549 # https://duckdb.org/docs/sql/statements/analyze 7550 if not self._curr: 7551 return self.expression(exp.Analyze) 7552 7553 options = [] 7554 while self._match_texts(self.ANALYZE_STYLES): 7555 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7556 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7557 else: 7558 options.append(self._prev.text.upper()) 7559 7560 this: t.Optional[exp.Expression] = None 7561 inner_expression: t.Optional[exp.Expression] = None 7562 7563 kind = self._curr and self._curr.text.upper() 7564 7565 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7566 this = self._parse_table_parts() 7567 elif self._match_text_seq("TABLES"): 7568 if self._match_set((TokenType.FROM, TokenType.IN)): 7569 kind = f"{kind} {self._prev.text.upper()}" 7570 this = self._parse_table(schema=True, is_db_reference=True) 7571 elif self._match_text_seq("DATABASE"): 7572 this = self._parse_table(schema=True, is_db_reference=True) 7573 elif self._match_text_seq("CLUSTER"): 7574 this = self._parse_table() 7575 # Try matching inner expr keywords before fallback to parse table. 7576 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7577 kind = None 7578 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7579 else: 7580 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7581 kind = None 7582 this = self._parse_table_parts() 7583 7584 partition = self._try_parse(self._parse_partition) 7585 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7586 return self._parse_as_command(start) 7587 7588 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7589 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7590 "WITH", "ASYNC", "MODE" 7591 ): 7592 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7593 else: 7594 mode = None 7595 7596 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7597 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7598 7599 properties = self._parse_properties() 7600 return self.expression( 7601 exp.Analyze, 7602 kind=kind, 7603 this=this, 7604 mode=mode, 7605 partition=partition, 7606 properties=properties, 7607 expression=inner_expression, 7608 options=options, 7609 ) 7610 7611 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7612 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7613 this = None 7614 kind = self._prev.text.upper() 7615 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7616 expressions = [] 7617 7618 if not self._match_text_seq("STATISTICS"): 7619 self.raise_error("Expecting token STATISTICS") 7620 7621 if self._match_text_seq("NOSCAN"): 7622 this = "NOSCAN" 7623 elif self._match(TokenType.FOR): 7624 if self._match_text_seq("ALL", "COLUMNS"): 7625 this = "FOR ALL COLUMNS" 7626 if self._match_texts("COLUMNS"): 7627 this = "FOR COLUMNS" 7628 expressions = self._parse_csv(self._parse_column_reference) 7629 elif self._match_text_seq("SAMPLE"): 7630 sample = self._parse_number() 7631 expressions = [ 7632 self.expression( 7633 exp.AnalyzeSample, 7634 sample=sample, 7635 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7636 ) 7637 ] 7638 7639 return self.expression( 7640 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7641 ) 7642 7643 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7644 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7645 kind = None 7646 this = None 7647 expression: t.Optional[exp.Expression] = None 7648 if self._match_text_seq("REF", "UPDATE"): 7649 kind = "REF" 7650 this = "UPDATE" 7651 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7652 this = "UPDATE SET DANGLING TO NULL" 7653 elif self._match_text_seq("STRUCTURE"): 7654 kind = "STRUCTURE" 7655 if self._match_text_seq("CASCADE", "FAST"): 7656 this = "CASCADE FAST" 7657 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7658 ("ONLINE", "OFFLINE") 7659 ): 7660 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7661 expression = self._parse_into() 7662 7663 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7664 7665 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7666 this = self._prev.text.upper() 7667 if self._match_text_seq("COLUMNS"): 7668 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7669 return None 7670 7671 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7672 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7673 if self._match_text_seq("STATISTICS"): 7674 return self.expression(exp.AnalyzeDelete, kind=kind) 7675 return None 7676 7677 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7678 if self._match_text_seq("CHAINED", "ROWS"): 7679 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7680 return None 7681 7682 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7683 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7684 this = self._prev.text.upper() 7685 expression: t.Optional[exp.Expression] = None 7686 expressions = [] 7687 update_options = None 7688 7689 if self._match_text_seq("HISTOGRAM", "ON"): 7690 expressions = self._parse_csv(self._parse_column_reference) 7691 with_expressions = [] 7692 while self._match(TokenType.WITH): 7693 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7694 if self._match_texts(("SYNC", "ASYNC")): 7695 if self._match_text_seq("MODE", advance=False): 7696 with_expressions.append(f"{self._prev.text.upper()} MODE") 7697 self._advance() 7698 else: 7699 buckets = self._parse_number() 7700 if self._match_text_seq("BUCKETS"): 7701 with_expressions.append(f"{buckets} BUCKETS") 7702 if with_expressions: 7703 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7704 7705 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7706 TokenType.UPDATE, advance=False 7707 ): 7708 update_options = self._prev.text.upper() 7709 self._advance() 7710 elif self._match_text_seq("USING", "DATA"): 7711 expression = self.expression(exp.UsingData, this=self._parse_string()) 7712 7713 return self.expression( 7714 exp.AnalyzeHistogram, 7715 this=this, 7716 expressions=expressions, 7717 expression=expression, 7718 update_options=update_options, 7719 ) 7720 7721 def _parse_merge(self) -> exp.Merge: 7722 self._match(TokenType.INTO) 7723 target = self._parse_table() 7724 7725 if target and self._match(TokenType.ALIAS, advance=False): 7726 target.set("alias", self._parse_table_alias()) 7727 7728 self._match(TokenType.USING) 7729 using = self._parse_table() 7730 7731 self._match(TokenType.ON) 7732 on = self._parse_assignment() 7733 7734 return self.expression( 7735 exp.Merge, 7736 this=target, 7737 using=using, 7738 on=on, 7739 whens=self._parse_when_matched(), 7740 returning=self._parse_returning(), 7741 ) 7742 7743 def _parse_when_matched(self) -> exp.Whens: 7744 whens = [] 7745 7746 while self._match(TokenType.WHEN): 7747 matched = not self._match(TokenType.NOT) 7748 self._match_text_seq("MATCHED") 7749 source = ( 7750 False 7751 if self._match_text_seq("BY", "TARGET") 7752 else self._match_text_seq("BY", "SOURCE") 7753 ) 7754 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7755 7756 self._match(TokenType.THEN) 7757 7758 if self._match(TokenType.INSERT): 7759 this = self._parse_star() 7760 if this: 7761 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7762 else: 7763 then = self.expression( 7764 exp.Insert, 7765 this=exp.var("ROW") 7766 if self._match_text_seq("ROW") 7767 else self._parse_value(values=False), 7768 expression=self._match_text_seq("VALUES") and self._parse_value(), 7769 ) 7770 elif self._match(TokenType.UPDATE): 7771 expressions = self._parse_star() 7772 if expressions: 7773 then = self.expression(exp.Update, expressions=expressions) 7774 else: 7775 then = self.expression( 7776 exp.Update, 7777 expressions=self._match(TokenType.SET) 7778 and self._parse_csv(self._parse_equality), 7779 ) 7780 elif self._match(TokenType.DELETE): 7781 then = self.expression(exp.Var, this=self._prev.text) 7782 else: 7783 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7784 7785 whens.append( 7786 self.expression( 7787 exp.When, 7788 matched=matched, 7789 source=source, 7790 condition=condition, 7791 then=then, 7792 ) 7793 ) 7794 return self.expression(exp.Whens, expressions=whens) 7795 7796 def _parse_show(self) -> t.Optional[exp.Expression]: 7797 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7798 if parser: 7799 return parser(self) 7800 return self._parse_as_command(self._prev) 7801 7802 def _parse_set_item_assignment( 7803 self, kind: t.Optional[str] = None 7804 ) -> t.Optional[exp.Expression]: 7805 index = self._index 7806 7807 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7808 return self._parse_set_transaction(global_=kind == "GLOBAL") 7809 7810 left = self._parse_primary() or self._parse_column() 7811 assignment_delimiter = self._match_texts(("=", "TO")) 7812 7813 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7814 self._retreat(index) 7815 return None 7816 7817 right = self._parse_statement() or self._parse_id_var() 7818 if isinstance(right, (exp.Column, exp.Identifier)): 7819 right = exp.var(right.name) 7820 7821 this = self.expression(exp.EQ, this=left, expression=right) 7822 return self.expression(exp.SetItem, this=this, kind=kind) 7823 7824 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7825 self._match_text_seq("TRANSACTION") 7826 characteristics = self._parse_csv( 7827 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7828 ) 7829 return self.expression( 7830 exp.SetItem, 7831 expressions=characteristics, 7832 kind="TRANSACTION", 7833 **{"global": global_}, # type: ignore 7834 ) 7835 7836 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7837 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7838 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7839 7840 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7841 index = self._index 7842 set_ = self.expression( 7843 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7844 ) 7845 7846 if self._curr: 7847 self._retreat(index) 7848 return self._parse_as_command(self._prev) 7849 7850 return set_ 7851 7852 def _parse_var_from_options( 7853 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7854 ) -> t.Optional[exp.Var]: 7855 start = self._curr 7856 if not start: 7857 return None 7858 7859 option = start.text.upper() 7860 continuations = options.get(option) 7861 7862 index = self._index 7863 self._advance() 7864 for keywords in continuations or []: 7865 if isinstance(keywords, str): 7866 keywords = (keywords,) 7867 7868 if self._match_text_seq(*keywords): 7869 option = f"{option} {' '.join(keywords)}" 7870 break 7871 else: 7872 if continuations or continuations is None: 7873 if raise_unmatched: 7874 self.raise_error(f"Unknown option {option}") 7875 7876 self._retreat(index) 7877 return None 7878 7879 return exp.var(option) 7880 7881 def _parse_as_command(self, start: Token) -> exp.Command: 7882 while self._curr: 7883 self._advance() 7884 text = self._find_sql(start, self._prev) 7885 size = len(start.text) 7886 self._warn_unsupported() 7887 return exp.Command(this=text[:size], expression=text[size:]) 7888 7889 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7890 settings = [] 7891 7892 self._match_l_paren() 7893 kind = self._parse_id_var() 7894 7895 if self._match(TokenType.L_PAREN): 7896 while True: 7897 key = self._parse_id_var() 7898 value = self._parse_primary() 7899 if not key and value is None: 7900 break 7901 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7902 self._match(TokenType.R_PAREN) 7903 7904 self._match_r_paren() 7905 7906 return self.expression( 7907 exp.DictProperty, 7908 this=this, 7909 kind=kind.this if kind else None, 7910 settings=settings, 7911 ) 7912 7913 def _parse_dict_range(self, this: str) -> exp.DictRange: 7914 self._match_l_paren() 7915 has_min = self._match_text_seq("MIN") 7916 if has_min: 7917 min = self._parse_var() or self._parse_primary() 7918 self._match_text_seq("MAX") 7919 max = self._parse_var() or self._parse_primary() 7920 else: 7921 max = self._parse_var() or self._parse_primary() 7922 min = exp.Literal.number(0) 7923 self._match_r_paren() 7924 return self.expression(exp.DictRange, this=this, min=min, max=max) 7925 7926 def _parse_comprehension( 7927 self, this: t.Optional[exp.Expression] 7928 ) -> t.Optional[exp.Comprehension]: 7929 index = self._index 7930 expression = self._parse_column() 7931 if not self._match(TokenType.IN): 7932 self._retreat(index - 1) 7933 return None 7934 iterator = self._parse_column() 7935 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7936 return self.expression( 7937 exp.Comprehension, 7938 this=this, 7939 expression=expression, 7940 iterator=iterator, 7941 condition=condition, 7942 ) 7943 7944 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7945 if self._match(TokenType.HEREDOC_STRING): 7946 return self.expression(exp.Heredoc, this=self._prev.text) 7947 7948 if not self._match_text_seq("$"): 7949 return None 7950 7951 tags = ["$"] 7952 tag_text = None 7953 7954 if self._is_connected(): 7955 self._advance() 7956 tags.append(self._prev.text.upper()) 7957 else: 7958 self.raise_error("No closing $ found") 7959 7960 if tags[-1] != "$": 7961 if self._is_connected() and self._match_text_seq("$"): 7962 tag_text = tags[-1] 7963 tags.append("$") 7964 else: 7965 self.raise_error("No closing $ found") 7966 7967 heredoc_start = self._curr 7968 7969 while self._curr: 7970 if self._match_text_seq(*tags, advance=False): 7971 this = self._find_sql(heredoc_start, self._prev) 7972 self._advance(len(tags)) 7973 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7974 7975 self._advance() 7976 7977 self.raise_error(f"No closing {''.join(tags)} found") 7978 return None 7979 7980 def _find_parser( 7981 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7982 ) -> t.Optional[t.Callable]: 7983 if not self._curr: 7984 return None 7985 7986 index = self._index 7987 this = [] 7988 while True: 7989 # The current token might be multiple words 7990 curr = self._curr.text.upper() 7991 key = curr.split(" ") 7992 this.append(curr) 7993 7994 self._advance() 7995 result, trie = in_trie(trie, key) 7996 if result == TrieResult.FAILED: 7997 break 7998 7999 if result == TrieResult.EXISTS: 8000 subparser = parsers[" ".join(this)] 8001 return subparser 8002 8003 self._retreat(index) 8004 return None 8005 8006 def _match(self, token_type, advance=True, expression=None): 8007 if not self._curr: 8008 return None 8009 8010 if self._curr.token_type == token_type: 8011 if advance: 8012 self._advance() 8013 self._add_comments(expression) 8014 return True 8015 8016 return None 8017 8018 def _match_set(self, types, advance=True): 8019 if not self._curr: 8020 return None 8021 8022 if self._curr.token_type in types: 8023 if advance: 8024 self._advance() 8025 return True 8026 8027 return None 8028 8029 def _match_pair(self, token_type_a, token_type_b, advance=True): 8030 if not self._curr or not self._next: 8031 return None 8032 8033 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8034 if advance: 8035 self._advance(2) 8036 return True 8037 8038 return None 8039 8040 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8041 if not self._match(TokenType.L_PAREN, expression=expression): 8042 self.raise_error("Expecting (") 8043 8044 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8045 if not self._match(TokenType.R_PAREN, expression=expression): 8046 self.raise_error("Expecting )") 8047 8048 def _match_texts(self, texts, advance=True): 8049 if ( 8050 self._curr 8051 and self._curr.token_type != TokenType.STRING 8052 and self._curr.text.upper() in texts 8053 ): 8054 if advance: 8055 self._advance() 8056 return True 8057 return None 8058 8059 def _match_text_seq(self, *texts, advance=True): 8060 index = self._index 8061 for text in texts: 8062 if ( 8063 self._curr 8064 and self._curr.token_type != TokenType.STRING 8065 and self._curr.text.upper() == text 8066 ): 8067 self._advance() 8068 else: 8069 self._retreat(index) 8070 return None 8071 8072 if not advance: 8073 self._retreat(index) 8074 8075 return True 8076 8077 def _replace_lambda( 8078 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8079 ) -> t.Optional[exp.Expression]: 8080 if not node: 8081 return node 8082 8083 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8084 8085 for column in node.find_all(exp.Column): 8086 typ = lambda_types.get(column.parts[0].name) 8087 if typ is not None: 8088 dot_or_id = column.to_dot() if column.table else column.this 8089 8090 if typ: 8091 dot_or_id = self.expression( 8092 exp.Cast, 8093 this=dot_or_id, 8094 to=typ, 8095 ) 8096 8097 parent = column.parent 8098 8099 while isinstance(parent, exp.Dot): 8100 if not isinstance(parent.parent, exp.Dot): 8101 parent.replace(dot_or_id) 8102 break 8103 parent = parent.parent 8104 else: 8105 if column is node: 8106 node = dot_or_id 8107 else: 8108 column.replace(dot_or_id) 8109 return node 8110 8111 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8112 start = self._prev 8113 8114 # Not to be confused with TRUNCATE(number, decimals) function call 8115 if self._match(TokenType.L_PAREN): 8116 self._retreat(self._index - 2) 8117 return self._parse_function() 8118 8119 # Clickhouse supports TRUNCATE DATABASE as well 8120 is_database = self._match(TokenType.DATABASE) 8121 8122 self._match(TokenType.TABLE) 8123 8124 exists = self._parse_exists(not_=False) 8125 8126 expressions = self._parse_csv( 8127 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8128 ) 8129 8130 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8131 8132 if self._match_text_seq("RESTART", "IDENTITY"): 8133 identity = "RESTART" 8134 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8135 identity = "CONTINUE" 8136 else: 8137 identity = None 8138 8139 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8140 option = self._prev.text 8141 else: 8142 option = None 8143 8144 partition = self._parse_partition() 8145 8146 # Fallback case 8147 if self._curr: 8148 return self._parse_as_command(start) 8149 8150 return self.expression( 8151 exp.TruncateTable, 8152 expressions=expressions, 8153 is_database=is_database, 8154 exists=exists, 8155 cluster=cluster, 8156 identity=identity, 8157 option=option, 8158 partition=partition, 8159 ) 8160 8161 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8162 this = self._parse_ordered(self._parse_opclass) 8163 8164 if not self._match(TokenType.WITH): 8165 return this 8166 8167 op = self._parse_var(any_token=True) 8168 8169 return self.expression(exp.WithOperator, this=this, op=op) 8170 8171 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8172 self._match(TokenType.EQ) 8173 self._match(TokenType.L_PAREN) 8174 8175 opts: t.List[t.Optional[exp.Expression]] = [] 8176 option: exp.Expression | None 8177 while self._curr and not self._match(TokenType.R_PAREN): 8178 if self._match_text_seq("FORMAT_NAME", "="): 8179 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8180 option = self._parse_format_name() 8181 else: 8182 option = self._parse_property() 8183 8184 if option is None: 8185 self.raise_error("Unable to parse option") 8186 break 8187 8188 opts.append(option) 8189 8190 return opts 8191 8192 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8193 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8194 8195 options = [] 8196 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8197 option = self._parse_var(any_token=True) 8198 prev = self._prev.text.upper() 8199 8200 # Different dialects might separate options and values by white space, "=" and "AS" 8201 self._match(TokenType.EQ) 8202 self._match(TokenType.ALIAS) 8203 8204 param = self.expression(exp.CopyParameter, this=option) 8205 8206 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8207 TokenType.L_PAREN, advance=False 8208 ): 8209 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8210 param.set("expressions", self._parse_wrapped_options()) 8211 elif prev == "FILE_FORMAT": 8212 # T-SQL's external file format case 8213 param.set("expression", self._parse_field()) 8214 else: 8215 param.set("expression", self._parse_unquoted_field()) 8216 8217 options.append(param) 8218 self._match(sep) 8219 8220 return options 8221 8222 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8223 expr = self.expression(exp.Credentials) 8224 8225 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8226 expr.set("storage", self._parse_field()) 8227 if self._match_text_seq("CREDENTIALS"): 8228 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8229 creds = ( 8230 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8231 ) 8232 expr.set("credentials", creds) 8233 if self._match_text_seq("ENCRYPTION"): 8234 expr.set("encryption", self._parse_wrapped_options()) 8235 if self._match_text_seq("IAM_ROLE"): 8236 expr.set("iam_role", self._parse_field()) 8237 if self._match_text_seq("REGION"): 8238 expr.set("region", self._parse_field()) 8239 8240 return expr 8241 8242 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8243 return self._parse_field() 8244 8245 def _parse_copy(self) -> exp.Copy | exp.Command: 8246 start = self._prev 8247 8248 self._match(TokenType.INTO) 8249 8250 this = ( 8251 self._parse_select(nested=True, parse_subquery_alias=False) 8252 if self._match(TokenType.L_PAREN, advance=False) 8253 else self._parse_table(schema=True) 8254 ) 8255 8256 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8257 8258 files = self._parse_csv(self._parse_file_location) 8259 credentials = self._parse_credentials() 8260 8261 self._match_text_seq("WITH") 8262 8263 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8264 8265 # Fallback case 8266 if self._curr: 8267 return self._parse_as_command(start) 8268 8269 return self.expression( 8270 exp.Copy, 8271 this=this, 8272 kind=kind, 8273 credentials=credentials, 8274 files=files, 8275 params=params, 8276 ) 8277 8278 def _parse_normalize(self) -> exp.Normalize: 8279 return self.expression( 8280 exp.Normalize, 8281 this=self._parse_bitwise(), 8282 form=self._match(TokenType.COMMA) and self._parse_var(), 8283 ) 8284 8285 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8286 args = self._parse_csv(lambda: self._parse_lambda()) 8287 8288 this = seq_get(args, 0) 8289 decimals = seq_get(args, 1) 8290 8291 return expr_type( 8292 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8293 ) 8294 8295 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8296 star_token = self._prev 8297 8298 if self._match_text_seq("COLUMNS", "(", advance=False): 8299 this = self._parse_function() 8300 if isinstance(this, exp.Columns): 8301 this.set("unpack", True) 8302 return this 8303 8304 return self.expression( 8305 exp.Star, 8306 **{ # type: ignore 8307 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8308 "replace": self._parse_star_op("REPLACE"), 8309 "rename": self._parse_star_op("RENAME"), 8310 }, 8311 ).update_positions(star_token) 8312 8313 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8314 privilege_parts = [] 8315 8316 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8317 # (end of privilege list) or L_PAREN (start of column list) are met 8318 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8319 privilege_parts.append(self._curr.text.upper()) 8320 self._advance() 8321 8322 this = exp.var(" ".join(privilege_parts)) 8323 expressions = ( 8324 self._parse_wrapped_csv(self._parse_column) 8325 if self._match(TokenType.L_PAREN, advance=False) 8326 else None 8327 ) 8328 8329 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8330 8331 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8332 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8333 principal = self._parse_id_var() 8334 8335 if not principal: 8336 return None 8337 8338 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8339 8340 def _parse_grant(self) -> exp.Grant | exp.Command: 8341 start = self._prev 8342 8343 privileges = self._parse_csv(self._parse_grant_privilege) 8344 8345 self._match(TokenType.ON) 8346 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8347 8348 # Attempt to parse the securable e.g. MySQL allows names 8349 # such as "foo.*", "*.*" which are not easily parseable yet 8350 securable = self._try_parse(self._parse_table_parts) 8351 8352 if not securable or not self._match_text_seq("TO"): 8353 return self._parse_as_command(start) 8354 8355 principals = self._parse_csv(self._parse_grant_principal) 8356 8357 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8358 8359 if self._curr: 8360 return self._parse_as_command(start) 8361 8362 return self.expression( 8363 exp.Grant, 8364 privileges=privileges, 8365 kind=kind, 8366 securable=securable, 8367 principals=principals, 8368 grant_option=grant_option, 8369 ) 8370 8371 def _parse_overlay(self) -> exp.Overlay: 8372 return self.expression( 8373 exp.Overlay, 8374 **{ # type: ignore 8375 "this": self._parse_bitwise(), 8376 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8377 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8378 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8379 }, 8380 ) 8381 8382 def _parse_format_name(self) -> exp.Property: 8383 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8384 # for FILE_FORMAT = <format_name> 8385 return self.expression( 8386 exp.Property, 8387 this=exp.var("FORMAT_NAME"), 8388 value=self._parse_string() or self._parse_table_parts(), 8389 ) 8390 8391 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8392 args: t.List[exp.Expression] = [] 8393 8394 if self._match(TokenType.DISTINCT): 8395 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8396 self._match(TokenType.COMMA) 8397 8398 args.extend(self._parse_csv(self._parse_assignment)) 8399 8400 return self.expression( 8401 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8402 ) 8403 8404 def _identifier_expression( 8405 self, token: t.Optional[Token] = None, **kwargs: t.Any 8406 ) -> exp.Identifier: 8407 token = token or self._prev 8408 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8409 expression.update_positions(token) 8410 return expression
28def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 29 if len(args) == 1 and args[0].is_star: 30 return exp.StarMap(this=args[0]) 31 32 keys = [] 33 values = [] 34 for i in range(0, len(args), 2): 35 keys.append(args[i]) 36 values.append(args[i + 1]) 37 38 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
46def binary_range_parser( 47 expr_type: t.Type[exp.Expression], reverse_args: bool = False 48) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 49 def _parse_binary_range( 50 self: Parser, this: t.Optional[exp.Expression] 51 ) -> t.Optional[exp.Expression]: 52 expression = self._parse_bitwise() 53 if reverse_args: 54 this, expression = expression, this 55 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 56 57 return _parse_binary_range
60def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 61 # Default argument order is base, expression 62 this = seq_get(args, 0) 63 expression = seq_get(args, 1) 64 65 if expression: 66 if not dialect.LOG_BASE_FIRST: 67 this, expression = expression, this 68 return exp.Log(this=this, expression=expression) 69 70 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
90def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 91 def _builder(args: t.List, dialect: Dialect) -> E: 92 expression = expr_type( 93 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 94 ) 95 if len(args) > 2 and expr_type is exp.JSONExtract: 96 expression.set("expressions", args[2:]) 97 98 return expression 99 100 return _builder
103def build_mod(args: t.List) -> exp.Mod: 104 this = seq_get(args, 0) 105 expression = seq_get(args, 1) 106 107 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 108 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 109 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 110 111 return exp.Mod(this=this, expression=expression)
123def build_array_constructor( 124 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 125) -> exp.Expression: 126 array_exp = exp_class(expressions=args) 127 128 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 129 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 130 131 return array_exp
134def build_convert_timezone( 135 args: t.List, default_source_tz: t.Optional[str] = None 136) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 137 if len(args) == 2: 138 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 139 return exp.ConvertTimezone( 140 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 141 ) 142 143 return exp.ConvertTimezone.from_arg_list(args)
178class Parser(metaclass=_Parser): 179 """ 180 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 181 182 Args: 183 error_level: The desired error level. 184 Default: ErrorLevel.IMMEDIATE 185 error_message_context: The amount of context to capture from a query string when displaying 186 the error message (in number of characters). 187 Default: 100 188 max_errors: Maximum number of error messages to include in a raised ParseError. 189 This is only relevant if error_level is ErrorLevel.RAISE. 190 Default: 3 191 """ 192 193 FUNCTIONS: t.Dict[str, t.Callable] = { 194 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 195 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 196 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 197 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "CHAR": lambda args: exp.Chr(expressions=args), 204 "CHR": lambda args: exp.Chr(expressions=args), 205 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 206 "CONCAT": lambda args, dialect: exp.Concat( 207 expressions=args, 208 safe=not dialect.STRICT_STRING_CONCAT, 209 coalesce=dialect.CONCAT_COALESCE, 210 ), 211 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 212 expressions=args, 213 safe=not dialect.STRICT_STRING_CONCAT, 214 coalesce=dialect.CONCAT_COALESCE, 215 ), 216 "CONVERT_TIMEZONE": build_convert_timezone, 217 "DATE_TO_DATE_STR": lambda args: exp.Cast( 218 this=seq_get(args, 0), 219 to=exp.DataType(this=exp.DataType.Type.TEXT), 220 ), 221 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 222 start=seq_get(args, 0), 223 end=seq_get(args, 1), 224 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 225 ), 226 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 227 "HEX": build_hex, 228 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 229 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 230 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 231 "LIKE": build_like, 232 "LOG": build_logarithm, 233 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 234 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 235 "LOWER": build_lower, 236 "LPAD": lambda args: build_pad(args), 237 "LEFTPAD": lambda args: build_pad(args), 238 "LTRIM": lambda args: build_trim(args), 239 "MOD": build_mod, 240 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 241 "RPAD": lambda args: build_pad(args, is_left=False), 242 "RTRIM": lambda args: build_trim(args, is_left=False), 243 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 244 if len(args) != 2 245 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 246 "STRPOS": exp.StrPosition.from_arg_list, 247 "CHARINDEX": lambda args: build_locate_strposition(args), 248 "INSTR": exp.StrPosition.from_arg_list, 249 "LOCATE": lambda args: build_locate_strposition(args), 250 "TIME_TO_TIME_STR": lambda args: exp.Cast( 251 this=seq_get(args, 0), 252 to=exp.DataType(this=exp.DataType.Type.TEXT), 253 ), 254 "TO_HEX": build_hex, 255 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 256 this=exp.Cast( 257 this=seq_get(args, 0), 258 to=exp.DataType(this=exp.DataType.Type.TEXT), 259 ), 260 start=exp.Literal.number(1), 261 length=exp.Literal.number(10), 262 ), 263 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 264 "UPPER": build_upper, 265 "VAR_MAP": build_var_map, 266 } 267 268 NO_PAREN_FUNCTIONS = { 269 TokenType.CURRENT_DATE: exp.CurrentDate, 270 TokenType.CURRENT_DATETIME: exp.CurrentDate, 271 TokenType.CURRENT_TIME: exp.CurrentTime, 272 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 273 TokenType.CURRENT_USER: exp.CurrentUser, 274 } 275 276 STRUCT_TYPE_TOKENS = { 277 TokenType.NESTED, 278 TokenType.OBJECT, 279 TokenType.STRUCT, 280 TokenType.UNION, 281 } 282 283 NESTED_TYPE_TOKENS = { 284 TokenType.ARRAY, 285 TokenType.LIST, 286 TokenType.LOWCARDINALITY, 287 TokenType.MAP, 288 TokenType.NULLABLE, 289 TokenType.RANGE, 290 *STRUCT_TYPE_TOKENS, 291 } 292 293 ENUM_TYPE_TOKENS = { 294 TokenType.DYNAMIC, 295 TokenType.ENUM, 296 TokenType.ENUM8, 297 TokenType.ENUM16, 298 } 299 300 AGGREGATE_TYPE_TOKENS = { 301 TokenType.AGGREGATEFUNCTION, 302 TokenType.SIMPLEAGGREGATEFUNCTION, 303 } 304 305 TYPE_TOKENS = { 306 TokenType.BIT, 307 TokenType.BOOLEAN, 308 TokenType.TINYINT, 309 TokenType.UTINYINT, 310 TokenType.SMALLINT, 311 TokenType.USMALLINT, 312 TokenType.INT, 313 TokenType.UINT, 314 TokenType.BIGINT, 315 TokenType.UBIGINT, 316 TokenType.INT128, 317 TokenType.UINT128, 318 TokenType.INT256, 319 TokenType.UINT256, 320 TokenType.MEDIUMINT, 321 TokenType.UMEDIUMINT, 322 TokenType.FIXEDSTRING, 323 TokenType.FLOAT, 324 TokenType.DOUBLE, 325 TokenType.UDOUBLE, 326 TokenType.CHAR, 327 TokenType.NCHAR, 328 TokenType.VARCHAR, 329 TokenType.NVARCHAR, 330 TokenType.BPCHAR, 331 TokenType.TEXT, 332 TokenType.MEDIUMTEXT, 333 TokenType.LONGTEXT, 334 TokenType.BLOB, 335 TokenType.MEDIUMBLOB, 336 TokenType.LONGBLOB, 337 TokenType.BINARY, 338 TokenType.VARBINARY, 339 TokenType.JSON, 340 TokenType.JSONB, 341 TokenType.INTERVAL, 342 TokenType.TINYBLOB, 343 TokenType.TINYTEXT, 344 TokenType.TIME, 345 TokenType.TIMETZ, 346 TokenType.TIMESTAMP, 347 TokenType.TIMESTAMP_S, 348 TokenType.TIMESTAMP_MS, 349 TokenType.TIMESTAMP_NS, 350 TokenType.TIMESTAMPTZ, 351 TokenType.TIMESTAMPLTZ, 352 TokenType.TIMESTAMPNTZ, 353 TokenType.DATETIME, 354 TokenType.DATETIME2, 355 TokenType.DATETIME64, 356 TokenType.SMALLDATETIME, 357 TokenType.DATE, 358 TokenType.DATE32, 359 TokenType.INT4RANGE, 360 TokenType.INT4MULTIRANGE, 361 TokenType.INT8RANGE, 362 TokenType.INT8MULTIRANGE, 363 TokenType.NUMRANGE, 364 TokenType.NUMMULTIRANGE, 365 TokenType.TSRANGE, 366 TokenType.TSMULTIRANGE, 367 TokenType.TSTZRANGE, 368 TokenType.TSTZMULTIRANGE, 369 TokenType.DATERANGE, 370 TokenType.DATEMULTIRANGE, 371 TokenType.DECIMAL, 372 TokenType.DECIMAL32, 373 TokenType.DECIMAL64, 374 TokenType.DECIMAL128, 375 TokenType.DECIMAL256, 376 TokenType.UDECIMAL, 377 TokenType.BIGDECIMAL, 378 TokenType.UUID, 379 TokenType.GEOGRAPHY, 380 TokenType.GEOMETRY, 381 TokenType.POINT, 382 TokenType.RING, 383 TokenType.LINESTRING, 384 TokenType.MULTILINESTRING, 385 TokenType.POLYGON, 386 TokenType.MULTIPOLYGON, 387 TokenType.HLLSKETCH, 388 TokenType.HSTORE, 389 TokenType.PSEUDO_TYPE, 390 TokenType.SUPER, 391 TokenType.SERIAL, 392 TokenType.SMALLSERIAL, 393 TokenType.BIGSERIAL, 394 TokenType.XML, 395 TokenType.YEAR, 396 TokenType.USERDEFINED, 397 TokenType.MONEY, 398 TokenType.SMALLMONEY, 399 TokenType.ROWVERSION, 400 TokenType.IMAGE, 401 TokenType.VARIANT, 402 TokenType.VECTOR, 403 TokenType.VOID, 404 TokenType.OBJECT, 405 TokenType.OBJECT_IDENTIFIER, 406 TokenType.INET, 407 TokenType.IPADDRESS, 408 TokenType.IPPREFIX, 409 TokenType.IPV4, 410 TokenType.IPV6, 411 TokenType.UNKNOWN, 412 TokenType.NOTHING, 413 TokenType.NULL, 414 TokenType.NAME, 415 TokenType.TDIGEST, 416 TokenType.DYNAMIC, 417 *ENUM_TYPE_TOKENS, 418 *NESTED_TYPE_TOKENS, 419 *AGGREGATE_TYPE_TOKENS, 420 } 421 422 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 423 TokenType.BIGINT: TokenType.UBIGINT, 424 TokenType.INT: TokenType.UINT, 425 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 426 TokenType.SMALLINT: TokenType.USMALLINT, 427 TokenType.TINYINT: TokenType.UTINYINT, 428 TokenType.DECIMAL: TokenType.UDECIMAL, 429 TokenType.DOUBLE: TokenType.UDOUBLE, 430 } 431 432 SUBQUERY_PREDICATES = { 433 TokenType.ANY: exp.Any, 434 TokenType.ALL: exp.All, 435 TokenType.EXISTS: exp.Exists, 436 TokenType.SOME: exp.Any, 437 } 438 439 RESERVED_TOKENS = { 440 *Tokenizer.SINGLE_TOKENS.values(), 441 TokenType.SELECT, 442 } - {TokenType.IDENTIFIER} 443 444 DB_CREATABLES = { 445 TokenType.DATABASE, 446 TokenType.DICTIONARY, 447 TokenType.FILE_FORMAT, 448 TokenType.MODEL, 449 TokenType.NAMESPACE, 450 TokenType.SCHEMA, 451 TokenType.SEQUENCE, 452 TokenType.SINK, 453 TokenType.SOURCE, 454 TokenType.STAGE, 455 TokenType.STORAGE_INTEGRATION, 456 TokenType.STREAMLIT, 457 TokenType.TABLE, 458 TokenType.TAG, 459 TokenType.VIEW, 460 TokenType.WAREHOUSE, 461 } 462 463 CREATABLES = { 464 TokenType.COLUMN, 465 TokenType.CONSTRAINT, 466 TokenType.FOREIGN_KEY, 467 TokenType.FUNCTION, 468 TokenType.INDEX, 469 TokenType.PROCEDURE, 470 *DB_CREATABLES, 471 } 472 473 ALTERABLES = { 474 TokenType.INDEX, 475 TokenType.TABLE, 476 TokenType.VIEW, 477 } 478 479 # Tokens that can represent identifiers 480 ID_VAR_TOKENS = { 481 TokenType.ALL, 482 TokenType.ATTACH, 483 TokenType.VAR, 484 TokenType.ANTI, 485 TokenType.APPLY, 486 TokenType.ASC, 487 TokenType.ASOF, 488 TokenType.AUTO_INCREMENT, 489 TokenType.BEGIN, 490 TokenType.BPCHAR, 491 TokenType.CACHE, 492 TokenType.CASE, 493 TokenType.COLLATE, 494 TokenType.COMMAND, 495 TokenType.COMMENT, 496 TokenType.COMMIT, 497 TokenType.CONSTRAINT, 498 TokenType.COPY, 499 TokenType.CUBE, 500 TokenType.CURRENT_SCHEMA, 501 TokenType.DEFAULT, 502 TokenType.DELETE, 503 TokenType.DESC, 504 TokenType.DESCRIBE, 505 TokenType.DETACH, 506 TokenType.DICTIONARY, 507 TokenType.DIV, 508 TokenType.END, 509 TokenType.EXECUTE, 510 TokenType.EXPORT, 511 TokenType.ESCAPE, 512 TokenType.FALSE, 513 TokenType.FIRST, 514 TokenType.FILTER, 515 TokenType.FINAL, 516 TokenType.FORMAT, 517 TokenType.FULL, 518 TokenType.GET, 519 TokenType.IDENTIFIER, 520 TokenType.IS, 521 TokenType.ISNULL, 522 TokenType.INTERVAL, 523 TokenType.KEEP, 524 TokenType.KILL, 525 TokenType.LEFT, 526 TokenType.LIMIT, 527 TokenType.LOAD, 528 TokenType.MERGE, 529 TokenType.NATURAL, 530 TokenType.NEXT, 531 TokenType.OFFSET, 532 TokenType.OPERATOR, 533 TokenType.ORDINALITY, 534 TokenType.OVERLAPS, 535 TokenType.OVERWRITE, 536 TokenType.PARTITION, 537 TokenType.PERCENT, 538 TokenType.PIVOT, 539 TokenType.PRAGMA, 540 TokenType.PUT, 541 TokenType.RANGE, 542 TokenType.RECURSIVE, 543 TokenType.REFERENCES, 544 TokenType.REFRESH, 545 TokenType.RENAME, 546 TokenType.REPLACE, 547 TokenType.RIGHT, 548 TokenType.ROLLUP, 549 TokenType.ROW, 550 TokenType.ROWS, 551 TokenType.SEMI, 552 TokenType.SET, 553 TokenType.SETTINGS, 554 TokenType.SHOW, 555 TokenType.TEMPORARY, 556 TokenType.TOP, 557 TokenType.TRUE, 558 TokenType.TRUNCATE, 559 TokenType.UNIQUE, 560 TokenType.UNNEST, 561 TokenType.UNPIVOT, 562 TokenType.UPDATE, 563 TokenType.USE, 564 TokenType.VOLATILE, 565 TokenType.WINDOW, 566 *CREATABLES, 567 *SUBQUERY_PREDICATES, 568 *TYPE_TOKENS, 569 *NO_PAREN_FUNCTIONS, 570 } 571 ID_VAR_TOKENS.remove(TokenType.UNION) 572 573 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 574 TokenType.ANTI, 575 TokenType.APPLY, 576 TokenType.ASOF, 577 TokenType.FULL, 578 TokenType.LEFT, 579 TokenType.LOCK, 580 TokenType.NATURAL, 581 TokenType.RIGHT, 582 TokenType.SEMI, 583 TokenType.WINDOW, 584 } 585 586 ALIAS_TOKENS = ID_VAR_TOKENS 587 588 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 589 590 ARRAY_CONSTRUCTORS = { 591 "ARRAY": exp.Array, 592 "LIST": exp.List, 593 } 594 595 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 596 597 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 598 599 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 600 601 FUNC_TOKENS = { 602 TokenType.COLLATE, 603 TokenType.COMMAND, 604 TokenType.CURRENT_DATE, 605 TokenType.CURRENT_DATETIME, 606 TokenType.CURRENT_SCHEMA, 607 TokenType.CURRENT_TIMESTAMP, 608 TokenType.CURRENT_TIME, 609 TokenType.CURRENT_USER, 610 TokenType.FILTER, 611 TokenType.FIRST, 612 TokenType.FORMAT, 613 TokenType.GET, 614 TokenType.GLOB, 615 TokenType.IDENTIFIER, 616 TokenType.INDEX, 617 TokenType.ISNULL, 618 TokenType.ILIKE, 619 TokenType.INSERT, 620 TokenType.LIKE, 621 TokenType.MERGE, 622 TokenType.NEXT, 623 TokenType.OFFSET, 624 TokenType.PRIMARY_KEY, 625 TokenType.RANGE, 626 TokenType.REPLACE, 627 TokenType.RLIKE, 628 TokenType.ROW, 629 TokenType.UNNEST, 630 TokenType.VAR, 631 TokenType.LEFT, 632 TokenType.RIGHT, 633 TokenType.SEQUENCE, 634 TokenType.DATE, 635 TokenType.DATETIME, 636 TokenType.TABLE, 637 TokenType.TIMESTAMP, 638 TokenType.TIMESTAMPTZ, 639 TokenType.TRUNCATE, 640 TokenType.WINDOW, 641 TokenType.XOR, 642 *TYPE_TOKENS, 643 *SUBQUERY_PREDICATES, 644 } 645 646 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 647 TokenType.AND: exp.And, 648 } 649 650 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 651 TokenType.COLON_EQ: exp.PropertyEQ, 652 } 653 654 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.OR: exp.Or, 656 } 657 658 EQUALITY = { 659 TokenType.EQ: exp.EQ, 660 TokenType.NEQ: exp.NEQ, 661 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 662 } 663 664 COMPARISON = { 665 TokenType.GT: exp.GT, 666 TokenType.GTE: exp.GTE, 667 TokenType.LT: exp.LT, 668 TokenType.LTE: exp.LTE, 669 } 670 671 BITWISE = { 672 TokenType.AMP: exp.BitwiseAnd, 673 TokenType.CARET: exp.BitwiseXor, 674 TokenType.PIPE: exp.BitwiseOr, 675 } 676 677 TERM = { 678 TokenType.DASH: exp.Sub, 679 TokenType.PLUS: exp.Add, 680 TokenType.MOD: exp.Mod, 681 TokenType.COLLATE: exp.Collate, 682 } 683 684 FACTOR = { 685 TokenType.DIV: exp.IntDiv, 686 TokenType.LR_ARROW: exp.Distance, 687 TokenType.SLASH: exp.Div, 688 TokenType.STAR: exp.Mul, 689 } 690 691 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 692 693 TIMES = { 694 TokenType.TIME, 695 TokenType.TIMETZ, 696 } 697 698 TIMESTAMPS = { 699 TokenType.TIMESTAMP, 700 TokenType.TIMESTAMPNTZ, 701 TokenType.TIMESTAMPTZ, 702 TokenType.TIMESTAMPLTZ, 703 *TIMES, 704 } 705 706 SET_OPERATIONS = { 707 TokenType.UNION, 708 TokenType.INTERSECT, 709 TokenType.EXCEPT, 710 } 711 712 JOIN_METHODS = { 713 TokenType.ASOF, 714 TokenType.NATURAL, 715 TokenType.POSITIONAL, 716 } 717 718 JOIN_SIDES = { 719 TokenType.LEFT, 720 TokenType.RIGHT, 721 TokenType.FULL, 722 } 723 724 JOIN_KINDS = { 725 TokenType.ANTI, 726 TokenType.CROSS, 727 TokenType.INNER, 728 TokenType.OUTER, 729 TokenType.SEMI, 730 TokenType.STRAIGHT_JOIN, 731 } 732 733 JOIN_HINTS: t.Set[str] = set() 734 735 LAMBDAS = { 736 TokenType.ARROW: lambda self, expressions: self.expression( 737 exp.Lambda, 738 this=self._replace_lambda( 739 self._parse_assignment(), 740 expressions, 741 ), 742 expressions=expressions, 743 ), 744 TokenType.FARROW: lambda self, expressions: self.expression( 745 exp.Kwarg, 746 this=exp.var(expressions[0].name), 747 expression=self._parse_assignment(), 748 ), 749 } 750 751 COLUMN_OPERATORS = { 752 TokenType.DOT: None, 753 TokenType.DOTCOLON: lambda self, this, to: self.expression( 754 exp.JSONCast, 755 this=this, 756 to=to, 757 ), 758 TokenType.DCOLON: lambda self, this, to: self.expression( 759 exp.Cast if self.STRICT_CAST else exp.TryCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.ARROW: lambda self, this, path: self.expression( 764 exp.JSONExtract, 765 this=this, 766 expression=self.dialect.to_json_path(path), 767 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 768 ), 769 TokenType.DARROW: lambda self, this, path: self.expression( 770 exp.JSONExtractScalar, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 776 exp.JSONBExtract, 777 this=this, 778 expression=path, 779 ), 780 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 781 exp.JSONBExtractScalar, 782 this=this, 783 expression=path, 784 ), 785 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 786 exp.JSONBContains, 787 this=this, 788 expression=key, 789 ), 790 } 791 792 EXPRESSION_PARSERS = { 793 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 794 exp.Column: lambda self: self._parse_column(), 795 exp.Condition: lambda self: self._parse_assignment(), 796 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 797 exp.Expression: lambda self: self._parse_expression(), 798 exp.From: lambda self: self._parse_from(joins=True), 799 exp.Group: lambda self: self._parse_group(), 800 exp.Having: lambda self: self._parse_having(), 801 exp.Hint: lambda self: self._parse_hint_body(), 802 exp.Identifier: lambda self: self._parse_id_var(), 803 exp.Join: lambda self: self._parse_join(), 804 exp.Lambda: lambda self: self._parse_lambda(), 805 exp.Lateral: lambda self: self._parse_lateral(), 806 exp.Limit: lambda self: self._parse_limit(), 807 exp.Offset: lambda self: self._parse_offset(), 808 exp.Order: lambda self: self._parse_order(), 809 exp.Ordered: lambda self: self._parse_ordered(), 810 exp.Properties: lambda self: self._parse_properties(), 811 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 812 exp.Qualify: lambda self: self._parse_qualify(), 813 exp.Returning: lambda self: self._parse_returning(), 814 exp.Select: lambda self: self._parse_select(), 815 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 816 exp.Table: lambda self: self._parse_table_parts(), 817 exp.TableAlias: lambda self: self._parse_table_alias(), 818 exp.Tuple: lambda self: self._parse_value(values=False), 819 exp.Whens: lambda self: self._parse_when_matched(), 820 exp.Where: lambda self: self._parse_where(), 821 exp.Window: lambda self: self._parse_named_window(), 822 exp.With: lambda self: self._parse_with(), 823 "JOIN_TYPE": lambda self: self._parse_join_parts(), 824 } 825 826 STATEMENT_PARSERS = { 827 TokenType.ALTER: lambda self: self._parse_alter(), 828 TokenType.ANALYZE: lambda self: self._parse_analyze(), 829 TokenType.BEGIN: lambda self: self._parse_transaction(), 830 TokenType.CACHE: lambda self: self._parse_cache(), 831 TokenType.COMMENT: lambda self: self._parse_comment(), 832 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 833 TokenType.COPY: lambda self: self._parse_copy(), 834 TokenType.CREATE: lambda self: self._parse_create(), 835 TokenType.DELETE: lambda self: self._parse_delete(), 836 TokenType.DESC: lambda self: self._parse_describe(), 837 TokenType.DESCRIBE: lambda self: self._parse_describe(), 838 TokenType.DROP: lambda self: self._parse_drop(), 839 TokenType.GRANT: lambda self: self._parse_grant(), 840 TokenType.INSERT: lambda self: self._parse_insert(), 841 TokenType.KILL: lambda self: self._parse_kill(), 842 TokenType.LOAD: lambda self: self._parse_load(), 843 TokenType.MERGE: lambda self: self._parse_merge(), 844 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 845 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 846 TokenType.REFRESH: lambda self: self._parse_refresh(), 847 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 848 TokenType.SET: lambda self: self._parse_set(), 849 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 850 TokenType.UNCACHE: lambda self: self._parse_uncache(), 851 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 852 TokenType.UPDATE: lambda self: self._parse_update(), 853 TokenType.USE: lambda self: self._parse_use(), 854 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 855 } 856 857 UNARY_PARSERS = { 858 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 859 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 860 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 861 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 862 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 863 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 864 } 865 866 STRING_PARSERS = { 867 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 868 exp.RawString, this=token.text 869 ), 870 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 871 exp.National, this=token.text 872 ), 873 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 874 TokenType.STRING: lambda self, token: self.expression( 875 exp.Literal, this=token.text, is_string=True 876 ), 877 TokenType.UNICODE_STRING: lambda self, token: self.expression( 878 exp.UnicodeString, 879 this=token.text, 880 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 881 ), 882 } 883 884 NUMERIC_PARSERS = { 885 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 886 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 887 TokenType.HEX_STRING: lambda self, token: self.expression( 888 exp.HexString, 889 this=token.text, 890 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 891 ), 892 TokenType.NUMBER: lambda self, token: self.expression( 893 exp.Literal, this=token.text, is_string=False 894 ), 895 } 896 897 PRIMARY_PARSERS = { 898 **STRING_PARSERS, 899 **NUMERIC_PARSERS, 900 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 901 TokenType.NULL: lambda self, _: self.expression(exp.Null), 902 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 903 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 904 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 905 TokenType.STAR: lambda self, _: self._parse_star_ops(), 906 } 907 908 PLACEHOLDER_PARSERS = { 909 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 910 TokenType.PARAMETER: lambda self: self._parse_parameter(), 911 TokenType.COLON: lambda self: ( 912 self.expression(exp.Placeholder, this=self._prev.text) 913 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 914 else None 915 ), 916 } 917 918 RANGE_PARSERS = { 919 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 920 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 921 TokenType.GLOB: binary_range_parser(exp.Glob), 922 TokenType.ILIKE: binary_range_parser(exp.ILike), 923 TokenType.IN: lambda self, this: self._parse_in(this), 924 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 925 TokenType.IS: lambda self, this: self._parse_is(this), 926 TokenType.LIKE: binary_range_parser(exp.Like), 927 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 928 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 929 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 930 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 931 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 932 } 933 934 PIPE_SYNTAX_TRANSFORM_PARSERS = { 935 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 936 "WHERE": lambda self, query: self._parse_pipe_syntax_where(query), 937 "ORDER BY": lambda self, query: query.order_by(self._parse_order(), copy=False), 938 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 939 "OFFSET": lambda self, query: query.offset(self._parse_offset(), copy=False), 940 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 941 } 942 943 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 944 "ALLOWED_VALUES": lambda self: self.expression( 945 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 946 ), 947 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 948 "AUTO": lambda self: self._parse_auto_property(), 949 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 950 "BACKUP": lambda self: self.expression( 951 exp.BackupProperty, this=self._parse_var(any_token=True) 952 ), 953 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 954 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 955 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 956 "CHECKSUM": lambda self: self._parse_checksum(), 957 "CLUSTER BY": lambda self: self._parse_cluster(), 958 "CLUSTERED": lambda self: self._parse_clustered_by(), 959 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 960 exp.CollateProperty, **kwargs 961 ), 962 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 963 "CONTAINS": lambda self: self._parse_contains_property(), 964 "COPY": lambda self: self._parse_copy_property(), 965 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 966 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 967 "DEFINER": lambda self: self._parse_definer(), 968 "DETERMINISTIC": lambda self: self.expression( 969 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 970 ), 971 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 972 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 973 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 974 "DISTKEY": lambda self: self._parse_distkey(), 975 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 976 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 977 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 978 "ENVIRONMENT": lambda self: self.expression( 979 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 980 ), 981 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 982 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 983 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 984 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 985 "FREESPACE": lambda self: self._parse_freespace(), 986 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 987 "HEAP": lambda self: self.expression(exp.HeapProperty), 988 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 989 "IMMUTABLE": lambda self: self.expression( 990 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 991 ), 992 "INHERITS": lambda self: self.expression( 993 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 994 ), 995 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 996 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 997 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 998 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 999 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1000 "LIKE": lambda self: self._parse_create_like(), 1001 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1002 "LOCK": lambda self: self._parse_locking(), 1003 "LOCKING": lambda self: self._parse_locking(), 1004 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1005 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1006 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1007 "MODIFIES": lambda self: self._parse_modifies_property(), 1008 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1009 "NO": lambda self: self._parse_no_property(), 1010 "ON": lambda self: self._parse_on_property(), 1011 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1012 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1013 "PARTITION": lambda self: self._parse_partitioned_of(), 1014 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1015 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1016 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1017 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1018 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1019 "READS": lambda self: self._parse_reads_property(), 1020 "REMOTE": lambda self: self._parse_remote_with_connection(), 1021 "RETURNS": lambda self: self._parse_returns(), 1022 "STRICT": lambda self: self.expression(exp.StrictProperty), 1023 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1024 "ROW": lambda self: self._parse_row(), 1025 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1026 "SAMPLE": lambda self: self.expression( 1027 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1028 ), 1029 "SECURE": lambda self: self.expression(exp.SecureProperty), 1030 "SECURITY": lambda self: self._parse_security(), 1031 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1032 "SETTINGS": lambda self: self._parse_settings_property(), 1033 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1034 "SORTKEY": lambda self: self._parse_sortkey(), 1035 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1036 "STABLE": lambda self: self.expression( 1037 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1038 ), 1039 "STORED": lambda self: self._parse_stored(), 1040 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1041 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1042 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1043 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1044 "TO": lambda self: self._parse_to_table(), 1045 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1046 "TRANSFORM": lambda self: self.expression( 1047 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1048 ), 1049 "TTL": lambda self: self._parse_ttl(), 1050 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1051 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1052 "VOLATILE": lambda self: self._parse_volatile_property(), 1053 "WITH": lambda self: self._parse_with_property(), 1054 } 1055 1056 CONSTRAINT_PARSERS = { 1057 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1058 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1059 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1060 "CHARACTER SET": lambda self: self.expression( 1061 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1062 ), 1063 "CHECK": lambda self: self.expression( 1064 exp.CheckColumnConstraint, 1065 this=self._parse_wrapped(self._parse_assignment), 1066 enforced=self._match_text_seq("ENFORCED"), 1067 ), 1068 "COLLATE": lambda self: self.expression( 1069 exp.CollateColumnConstraint, 1070 this=self._parse_identifier() or self._parse_column(), 1071 ), 1072 "COMMENT": lambda self: self.expression( 1073 exp.CommentColumnConstraint, this=self._parse_string() 1074 ), 1075 "COMPRESS": lambda self: self._parse_compress(), 1076 "CLUSTERED": lambda self: self.expression( 1077 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1078 ), 1079 "NONCLUSTERED": lambda self: self.expression( 1080 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1081 ), 1082 "DEFAULT": lambda self: self.expression( 1083 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1084 ), 1085 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1086 "EPHEMERAL": lambda self: self.expression( 1087 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1088 ), 1089 "EXCLUDE": lambda self: self.expression( 1090 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1091 ), 1092 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1093 "FORMAT": lambda self: self.expression( 1094 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1095 ), 1096 "GENERATED": lambda self: self._parse_generated_as_identity(), 1097 "IDENTITY": lambda self: self._parse_auto_increment(), 1098 "INLINE": lambda self: self._parse_inline(), 1099 "LIKE": lambda self: self._parse_create_like(), 1100 "NOT": lambda self: self._parse_not_constraint(), 1101 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1102 "ON": lambda self: ( 1103 self._match(TokenType.UPDATE) 1104 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1105 ) 1106 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1107 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1108 "PERIOD": lambda self: self._parse_period_for_system_time(), 1109 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1110 "REFERENCES": lambda self: self._parse_references(match=False), 1111 "TITLE": lambda self: self.expression( 1112 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1113 ), 1114 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1115 "UNIQUE": lambda self: self._parse_unique(), 1116 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1117 "WATERMARK": lambda self: self.expression( 1118 exp.WatermarkColumnConstraint, 1119 this=self._match(TokenType.FOR) and self._parse_column(), 1120 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1121 ), 1122 "WITH": lambda self: self.expression( 1123 exp.Properties, expressions=self._parse_wrapped_properties() 1124 ), 1125 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1126 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1127 } 1128 1129 def _parse_pipe_syntax_select(self, query: exp.Query) -> exp.Query: 1130 select = self._parse_select() 1131 if isinstance(select, exp.Select): 1132 return select.from_(query.subquery(copy=False), copy=False) 1133 return query 1134 1135 def _parse_pipe_syntax_where(self, query: exp.Query) -> exp.Query: 1136 where = self._parse_where() 1137 return query.where(where, copy=False) 1138 1139 def _parse_pipe_syntax_limit(self, query: exp.Query) -> exp.Query: 1140 limit = self._parse_limit() 1141 offset = self._parse_offset() 1142 if limit: 1143 query.limit(limit, copy=False) 1144 if offset: 1145 query.offset(offset, copy=False) 1146 return query 1147 1148 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 1149 this = self._parse_assignment() 1150 if self._match_text_seq("GROUP", "AND", advance=False): 1151 return this 1152 1153 this = self._parse_alias(this) 1154 1155 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 1156 return self._parse_ordered(lambda: this) 1157 1158 return this 1159 1160 def _parse_pipe_syntax_aggregate_group_order_by( 1161 self, query: exp.Query, group_by_exists: bool = True 1162 ) -> exp.Query: 1163 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 1164 aggregates_or_groups, orders = [], [] 1165 for element in expr: 1166 if isinstance(element, exp.Ordered): 1167 this = element.this 1168 if isinstance(this, exp.Alias): 1169 element.set("this", this.args["alias"]) 1170 orders.append(element) 1171 else: 1172 this = element 1173 aggregates_or_groups.append(this) 1174 1175 if group_by_exists and isinstance(query, exp.Select): 1176 query = query.select(*aggregates_or_groups, copy=False).group_by( 1177 *[element.args.get("alias", element) for element in aggregates_or_groups], 1178 copy=False, 1179 ) 1180 else: 1181 query = exp.select(*aggregates_or_groups, copy=False).from_( 1182 query.subquery(copy=False), copy=False 1183 ) 1184 1185 if orders: 1186 return query.order_by(*orders, copy=False) 1187 1188 return query 1189 1190 def _parse_pipe_syntax_aggregate(self, query: exp.Query) -> exp.Query: 1191 self._match_text_seq("AGGREGATE") 1192 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 1193 1194 if self._match(TokenType.GROUP_BY) or ( 1195 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 1196 ): 1197 return self._parse_pipe_syntax_aggregate_group_order_by(query) 1198 1199 return query 1200 1201 def _parse_pipe_syntax_set_operator( 1202 self, query: t.Optional[exp.Query] 1203 ) -> t.Optional[exp.Query]: 1204 first_setop = self.parse_set_operation(this=query) 1205 1206 if not first_setop or not query: 1207 return None 1208 1209 first_setop.this.pop() 1210 distinct = first_setop.args.pop("distinct") 1211 1212 setops = [first_setop.expression.pop(), *self._parse_expressions()] 1213 1214 if isinstance(first_setop, exp.Union): 1215 return query.union(*setops, distinct=distinct, **first_setop.args) 1216 if isinstance(first_setop, exp.Except): 1217 return query.except_(*setops, distinct=distinct, **first_setop.args) 1218 return query.intersect(*setops, distinct=distinct, **first_setop.args) 1219 1220 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expression: 1221 klass = ( 1222 exp.PartitionedByBucket 1223 if self._prev.text.upper() == "BUCKET" 1224 else exp.PartitionByTruncate 1225 ) 1226 1227 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1228 this, expression = seq_get(args, 0), seq_get(args, 1) 1229 1230 if isinstance(this, exp.Literal): 1231 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1232 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1233 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1234 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1235 # 1236 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1237 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1238 this, expression = expression, this 1239 1240 return self.expression(klass, this=this, expression=expression) 1241 1242 ALTER_PARSERS = { 1243 "ADD": lambda self: self._parse_alter_table_add(), 1244 "AS": lambda self: self._parse_select(), 1245 "ALTER": lambda self: self._parse_alter_table_alter(), 1246 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1247 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1248 "DROP": lambda self: self._parse_alter_table_drop(), 1249 "RENAME": lambda self: self._parse_alter_table_rename(), 1250 "SET": lambda self: self._parse_alter_table_set(), 1251 "SWAP": lambda self: self.expression( 1252 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1253 ), 1254 } 1255 1256 ALTER_ALTER_PARSERS = { 1257 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1258 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1259 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1260 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1261 } 1262 1263 SCHEMA_UNNAMED_CONSTRAINTS = { 1264 "CHECK", 1265 "EXCLUDE", 1266 "FOREIGN KEY", 1267 "LIKE", 1268 "PERIOD", 1269 "PRIMARY KEY", 1270 "UNIQUE", 1271 "WATERMARK", 1272 "BUCKET", 1273 "TRUNCATE", 1274 } 1275 1276 NO_PAREN_FUNCTION_PARSERS = { 1277 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1278 "CASE": lambda self: self._parse_case(), 1279 "CONNECT_BY_ROOT": lambda self: self.expression( 1280 exp.ConnectByRoot, this=self._parse_column() 1281 ), 1282 "IF": lambda self: self._parse_if(), 1283 } 1284 1285 INVALID_FUNC_NAME_TOKENS = { 1286 TokenType.IDENTIFIER, 1287 TokenType.STRING, 1288 } 1289 1290 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1291 1292 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1293 1294 FUNCTION_PARSERS = { 1295 **{ 1296 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1297 }, 1298 **{ 1299 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1300 }, 1301 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1302 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1303 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1304 "DECODE": lambda self: self._parse_decode(), 1305 "EXTRACT": lambda self: self._parse_extract(), 1306 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1307 "GAP_FILL": lambda self: self._parse_gap_fill(), 1308 "JSON_OBJECT": lambda self: self._parse_json_object(), 1309 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1310 "JSON_TABLE": lambda self: self._parse_json_table(), 1311 "MATCH": lambda self: self._parse_match_against(), 1312 "NORMALIZE": lambda self: self._parse_normalize(), 1313 "OPENJSON": lambda self: self._parse_open_json(), 1314 "OVERLAY": lambda self: self._parse_overlay(), 1315 "POSITION": lambda self: self._parse_position(), 1316 "PREDICT": lambda self: self._parse_predict(), 1317 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1318 "STRING_AGG": lambda self: self._parse_string_agg(), 1319 "SUBSTRING": lambda self: self._parse_substring(), 1320 "TRIM": lambda self: self._parse_trim(), 1321 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1322 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1323 "XMLELEMENT": lambda self: self.expression( 1324 exp.XMLElement, 1325 this=self._match_text_seq("NAME") and self._parse_id_var(), 1326 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1327 ), 1328 "XMLTABLE": lambda self: self._parse_xml_table(), 1329 } 1330 1331 QUERY_MODIFIER_PARSERS = { 1332 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1333 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1334 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1335 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1336 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1337 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1338 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1339 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1340 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1341 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1342 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1343 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1344 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1345 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1346 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1347 TokenType.CLUSTER_BY: lambda self: ( 1348 "cluster", 1349 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1350 ), 1351 TokenType.DISTRIBUTE_BY: lambda self: ( 1352 "distribute", 1353 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1354 ), 1355 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1356 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1357 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1358 } 1359 1360 SET_PARSERS = { 1361 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1362 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1363 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1364 "TRANSACTION": lambda self: self._parse_set_transaction(), 1365 } 1366 1367 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1368 1369 TYPE_LITERAL_PARSERS = { 1370 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1371 } 1372 1373 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1374 1375 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1376 1377 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1378 1379 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1380 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1381 "ISOLATION": ( 1382 ("LEVEL", "REPEATABLE", "READ"), 1383 ("LEVEL", "READ", "COMMITTED"), 1384 ("LEVEL", "READ", "UNCOMITTED"), 1385 ("LEVEL", "SERIALIZABLE"), 1386 ), 1387 "READ": ("WRITE", "ONLY"), 1388 } 1389 1390 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1391 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1392 ) 1393 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1394 1395 CREATE_SEQUENCE: OPTIONS_TYPE = { 1396 "SCALE": ("EXTEND", "NOEXTEND"), 1397 "SHARD": ("EXTEND", "NOEXTEND"), 1398 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1399 **dict.fromkeys( 1400 ( 1401 "SESSION", 1402 "GLOBAL", 1403 "KEEP", 1404 "NOKEEP", 1405 "ORDER", 1406 "NOORDER", 1407 "NOCACHE", 1408 "CYCLE", 1409 "NOCYCLE", 1410 "NOMINVALUE", 1411 "NOMAXVALUE", 1412 "NOSCALE", 1413 "NOSHARD", 1414 ), 1415 tuple(), 1416 ), 1417 } 1418 1419 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1420 1421 USABLES: OPTIONS_TYPE = dict.fromkeys( 1422 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1423 ) 1424 1425 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1426 1427 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1428 "TYPE": ("EVOLUTION",), 1429 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1430 } 1431 1432 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1433 1434 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1435 1436 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1437 "NOT": ("ENFORCED",), 1438 "MATCH": ( 1439 "FULL", 1440 "PARTIAL", 1441 "SIMPLE", 1442 ), 1443 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1444 "USING": ( 1445 "BTREE", 1446 "HASH", 1447 ), 1448 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1449 } 1450 1451 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1452 "NO": ("OTHERS",), 1453 "CURRENT": ("ROW",), 1454 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1455 } 1456 1457 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1458 1459 CLONE_KEYWORDS = {"CLONE", "COPY"} 1460 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1461 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1462 1463 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1464 1465 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1466 1467 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1468 1469 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1470 1471 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1472 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1473 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1474 1475 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1476 1477 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1478 1479 ADD_CONSTRAINT_TOKENS = { 1480 TokenType.CONSTRAINT, 1481 TokenType.FOREIGN_KEY, 1482 TokenType.INDEX, 1483 TokenType.KEY, 1484 TokenType.PRIMARY_KEY, 1485 TokenType.UNIQUE, 1486 } 1487 1488 DISTINCT_TOKENS = {TokenType.DISTINCT} 1489 1490 NULL_TOKENS = {TokenType.NULL} 1491 1492 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1493 1494 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1495 1496 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1497 1498 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1499 1500 ODBC_DATETIME_LITERALS = { 1501 "d": exp.Date, 1502 "t": exp.Time, 1503 "ts": exp.Timestamp, 1504 } 1505 1506 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1507 1508 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1509 1510 # The style options for the DESCRIBE statement 1511 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1512 1513 # The style options for the ANALYZE statement 1514 ANALYZE_STYLES = { 1515 "BUFFER_USAGE_LIMIT", 1516 "FULL", 1517 "LOCAL", 1518 "NO_WRITE_TO_BINLOG", 1519 "SAMPLE", 1520 "SKIP_LOCKED", 1521 "VERBOSE", 1522 } 1523 1524 ANALYZE_EXPRESSION_PARSERS = { 1525 "ALL": lambda self: self._parse_analyze_columns(), 1526 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1527 "DELETE": lambda self: self._parse_analyze_delete(), 1528 "DROP": lambda self: self._parse_analyze_histogram(), 1529 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1530 "LIST": lambda self: self._parse_analyze_list(), 1531 "PREDICATE": lambda self: self._parse_analyze_columns(), 1532 "UPDATE": lambda self: self._parse_analyze_histogram(), 1533 "VALIDATE": lambda self: self._parse_analyze_validate(), 1534 } 1535 1536 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1537 1538 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1539 1540 OPERATION_MODIFIERS: t.Set[str] = set() 1541 1542 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1543 1544 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1545 1546 STRICT_CAST = True 1547 1548 PREFIXED_PIVOT_COLUMNS = False 1549 IDENTIFY_PIVOT_STRINGS = False 1550 1551 LOG_DEFAULTS_TO_LN = False 1552 1553 # Whether the table sample clause expects CSV syntax 1554 TABLESAMPLE_CSV = False 1555 1556 # The default method used for table sampling 1557 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1558 1559 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1560 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1561 1562 # Whether the TRIM function expects the characters to trim as its first argument 1563 TRIM_PATTERN_FIRST = False 1564 1565 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1566 STRING_ALIASES = False 1567 1568 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1569 MODIFIERS_ATTACHED_TO_SET_OP = True 1570 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1571 1572 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1573 NO_PAREN_IF_COMMANDS = True 1574 1575 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1576 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1577 1578 # Whether the `:` operator is used to extract a value from a VARIANT column 1579 COLON_IS_VARIANT_EXTRACT = False 1580 1581 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1582 # If this is True and '(' is not found, the keyword will be treated as an identifier 1583 VALUES_FOLLOWED_BY_PAREN = True 1584 1585 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1586 SUPPORTS_IMPLICIT_UNNEST = False 1587 1588 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1589 INTERVAL_SPANS = True 1590 1591 # Whether a PARTITION clause can follow a table reference 1592 SUPPORTS_PARTITION_SELECTION = False 1593 1594 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1595 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1596 1597 # Whether the 'AS' keyword is optional in the CTE definition syntax 1598 OPTIONAL_ALIAS_TOKEN_CTE = True 1599 1600 __slots__ = ( 1601 "error_level", 1602 "error_message_context", 1603 "max_errors", 1604 "dialect", 1605 "sql", 1606 "errors", 1607 "_tokens", 1608 "_index", 1609 "_curr", 1610 "_next", 1611 "_prev", 1612 "_prev_comments", 1613 ) 1614 1615 # Autofilled 1616 SHOW_TRIE: t.Dict = {} 1617 SET_TRIE: t.Dict = {} 1618 1619 def __init__( 1620 self, 1621 error_level: t.Optional[ErrorLevel] = None, 1622 error_message_context: int = 100, 1623 max_errors: int = 3, 1624 dialect: DialectType = None, 1625 ): 1626 from sqlglot.dialects import Dialect 1627 1628 self.error_level = error_level or ErrorLevel.IMMEDIATE 1629 self.error_message_context = error_message_context 1630 self.max_errors = max_errors 1631 self.dialect = Dialect.get_or_raise(dialect) 1632 self.reset() 1633 1634 def reset(self): 1635 self.sql = "" 1636 self.errors = [] 1637 self._tokens = [] 1638 self._index = 0 1639 self._curr = None 1640 self._next = None 1641 self._prev = None 1642 self._prev_comments = None 1643 1644 def parse( 1645 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1646 ) -> t.List[t.Optional[exp.Expression]]: 1647 """ 1648 Parses a list of tokens and returns a list of syntax trees, one tree 1649 per parsed SQL statement. 1650 1651 Args: 1652 raw_tokens: The list of tokens. 1653 sql: The original SQL string, used to produce helpful debug messages. 1654 1655 Returns: 1656 The list of the produced syntax trees. 1657 """ 1658 return self._parse( 1659 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1660 ) 1661 1662 def parse_into( 1663 self, 1664 expression_types: exp.IntoType, 1665 raw_tokens: t.List[Token], 1666 sql: t.Optional[str] = None, 1667 ) -> t.List[t.Optional[exp.Expression]]: 1668 """ 1669 Parses a list of tokens into a given Expression type. If a collection of Expression 1670 types is given instead, this method will try to parse the token list into each one 1671 of them, stopping at the first for which the parsing succeeds. 1672 1673 Args: 1674 expression_types: The expression type(s) to try and parse the token list into. 1675 raw_tokens: The list of tokens. 1676 sql: The original SQL string, used to produce helpful debug messages. 1677 1678 Returns: 1679 The target Expression. 1680 """ 1681 errors = [] 1682 for expression_type in ensure_list(expression_types): 1683 parser = self.EXPRESSION_PARSERS.get(expression_type) 1684 if not parser: 1685 raise TypeError(f"No parser registered for {expression_type}") 1686 1687 try: 1688 return self._parse(parser, raw_tokens, sql) 1689 except ParseError as e: 1690 e.errors[0]["into_expression"] = expression_type 1691 errors.append(e) 1692 1693 raise ParseError( 1694 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1695 errors=merge_errors(errors), 1696 ) from errors[-1] 1697 1698 def _parse( 1699 self, 1700 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1701 raw_tokens: t.List[Token], 1702 sql: t.Optional[str] = None, 1703 ) -> t.List[t.Optional[exp.Expression]]: 1704 self.reset() 1705 self.sql = sql or "" 1706 1707 total = len(raw_tokens) 1708 chunks: t.List[t.List[Token]] = [[]] 1709 1710 for i, token in enumerate(raw_tokens): 1711 if token.token_type == TokenType.SEMICOLON: 1712 if token.comments: 1713 chunks.append([token]) 1714 1715 if i < total - 1: 1716 chunks.append([]) 1717 else: 1718 chunks[-1].append(token) 1719 1720 expressions = [] 1721 1722 for tokens in chunks: 1723 self._index = -1 1724 self._tokens = tokens 1725 self._advance() 1726 1727 expressions.append(parse_method(self)) 1728 1729 if self._index < len(self._tokens): 1730 self.raise_error("Invalid expression / Unexpected token") 1731 1732 self.check_errors() 1733 1734 return expressions 1735 1736 def check_errors(self) -> None: 1737 """Logs or raises any found errors, depending on the chosen error level setting.""" 1738 if self.error_level == ErrorLevel.WARN: 1739 for error in self.errors: 1740 logger.error(str(error)) 1741 elif self.error_level == ErrorLevel.RAISE and self.errors: 1742 raise ParseError( 1743 concat_messages(self.errors, self.max_errors), 1744 errors=merge_errors(self.errors), 1745 ) 1746 1747 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1748 """ 1749 Appends an error in the list of recorded errors or raises it, depending on the chosen 1750 error level setting. 1751 """ 1752 token = token or self._curr or self._prev or Token.string("") 1753 start = token.start 1754 end = token.end + 1 1755 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1756 highlight = self.sql[start:end] 1757 end_context = self.sql[end : end + self.error_message_context] 1758 1759 error = ParseError.new( 1760 f"{message}. Line {token.line}, Col: {token.col}.\n" 1761 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1762 description=message, 1763 line=token.line, 1764 col=token.col, 1765 start_context=start_context, 1766 highlight=highlight, 1767 end_context=end_context, 1768 ) 1769 1770 if self.error_level == ErrorLevel.IMMEDIATE: 1771 raise error 1772 1773 self.errors.append(error) 1774 1775 def expression( 1776 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1777 ) -> E: 1778 """ 1779 Creates a new, validated Expression. 1780 1781 Args: 1782 exp_class: The expression class to instantiate. 1783 comments: An optional list of comments to attach to the expression. 1784 kwargs: The arguments to set for the expression along with their respective values. 1785 1786 Returns: 1787 The target expression. 1788 """ 1789 instance = exp_class(**kwargs) 1790 instance.add_comments(comments) if comments else self._add_comments(instance) 1791 return self.validate_expression(instance) 1792 1793 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1794 if expression and self._prev_comments: 1795 expression.add_comments(self._prev_comments) 1796 self._prev_comments = None 1797 1798 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1799 """ 1800 Validates an Expression, making sure that all its mandatory arguments are set. 1801 1802 Args: 1803 expression: The expression to validate. 1804 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1805 1806 Returns: 1807 The validated expression. 1808 """ 1809 if self.error_level != ErrorLevel.IGNORE: 1810 for error_message in expression.error_messages(args): 1811 self.raise_error(error_message) 1812 1813 return expression 1814 1815 def _find_sql(self, start: Token, end: Token) -> str: 1816 return self.sql[start.start : end.end + 1] 1817 1818 def _is_connected(self) -> bool: 1819 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1820 1821 def _advance(self, times: int = 1) -> None: 1822 self._index += times 1823 self._curr = seq_get(self._tokens, self._index) 1824 self._next = seq_get(self._tokens, self._index + 1) 1825 1826 if self._index > 0: 1827 self._prev = self._tokens[self._index - 1] 1828 self._prev_comments = self._prev.comments 1829 else: 1830 self._prev = None 1831 self._prev_comments = None 1832 1833 def _retreat(self, index: int) -> None: 1834 if index != self._index: 1835 self._advance(index - self._index) 1836 1837 def _warn_unsupported(self) -> None: 1838 if len(self._tokens) <= 1: 1839 return 1840 1841 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1842 # interested in emitting a warning for the one being currently processed. 1843 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1844 1845 logger.warning( 1846 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1847 ) 1848 1849 def _parse_command(self) -> exp.Command: 1850 self._warn_unsupported() 1851 return self.expression( 1852 exp.Command, 1853 comments=self._prev_comments, 1854 this=self._prev.text.upper(), 1855 expression=self._parse_string(), 1856 ) 1857 1858 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1859 """ 1860 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1861 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1862 solve this by setting & resetting the parser state accordingly 1863 """ 1864 index = self._index 1865 error_level = self.error_level 1866 1867 self.error_level = ErrorLevel.IMMEDIATE 1868 try: 1869 this = parse_method() 1870 except ParseError: 1871 this = None 1872 finally: 1873 if not this or retreat: 1874 self._retreat(index) 1875 self.error_level = error_level 1876 1877 return this 1878 1879 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1880 start = self._prev 1881 exists = self._parse_exists() if allow_exists else None 1882 1883 self._match(TokenType.ON) 1884 1885 materialized = self._match_text_seq("MATERIALIZED") 1886 kind = self._match_set(self.CREATABLES) and self._prev 1887 if not kind: 1888 return self._parse_as_command(start) 1889 1890 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1891 this = self._parse_user_defined_function(kind=kind.token_type) 1892 elif kind.token_type == TokenType.TABLE: 1893 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1894 elif kind.token_type == TokenType.COLUMN: 1895 this = self._parse_column() 1896 else: 1897 this = self._parse_id_var() 1898 1899 self._match(TokenType.IS) 1900 1901 return self.expression( 1902 exp.Comment, 1903 this=this, 1904 kind=kind.text, 1905 expression=self._parse_string(), 1906 exists=exists, 1907 materialized=materialized, 1908 ) 1909 1910 def _parse_to_table( 1911 self, 1912 ) -> exp.ToTableProperty: 1913 table = self._parse_table_parts(schema=True) 1914 return self.expression(exp.ToTableProperty, this=table) 1915 1916 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1917 def _parse_ttl(self) -> exp.Expression: 1918 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1919 this = self._parse_bitwise() 1920 1921 if self._match_text_seq("DELETE"): 1922 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1923 if self._match_text_seq("RECOMPRESS"): 1924 return self.expression( 1925 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1926 ) 1927 if self._match_text_seq("TO", "DISK"): 1928 return self.expression( 1929 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1930 ) 1931 if self._match_text_seq("TO", "VOLUME"): 1932 return self.expression( 1933 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1934 ) 1935 1936 return this 1937 1938 expressions = self._parse_csv(_parse_ttl_action) 1939 where = self._parse_where() 1940 group = self._parse_group() 1941 1942 aggregates = None 1943 if group and self._match(TokenType.SET): 1944 aggregates = self._parse_csv(self._parse_set_item) 1945 1946 return self.expression( 1947 exp.MergeTreeTTL, 1948 expressions=expressions, 1949 where=where, 1950 group=group, 1951 aggregates=aggregates, 1952 ) 1953 1954 def _parse_statement(self) -> t.Optional[exp.Expression]: 1955 if self._curr is None: 1956 return None 1957 1958 if self._match_set(self.STATEMENT_PARSERS): 1959 comments = self._prev_comments 1960 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1961 stmt.add_comments(comments, prepend=True) 1962 return stmt 1963 1964 if self._match_set(self.dialect.tokenizer.COMMANDS): 1965 return self._parse_command() 1966 1967 expression = self._parse_expression() 1968 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1969 return self._parse_query_modifiers(expression) 1970 1971 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1972 start = self._prev 1973 temporary = self._match(TokenType.TEMPORARY) 1974 materialized = self._match_text_seq("MATERIALIZED") 1975 1976 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1977 if not kind: 1978 return self._parse_as_command(start) 1979 1980 concurrently = self._match_text_seq("CONCURRENTLY") 1981 if_exists = exists or self._parse_exists() 1982 1983 if kind == "COLUMN": 1984 this = self._parse_column() 1985 else: 1986 this = self._parse_table_parts( 1987 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1988 ) 1989 1990 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1991 1992 if self._match(TokenType.L_PAREN, advance=False): 1993 expressions = self._parse_wrapped_csv(self._parse_types) 1994 else: 1995 expressions = None 1996 1997 return self.expression( 1998 exp.Drop, 1999 exists=if_exists, 2000 this=this, 2001 expressions=expressions, 2002 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2003 temporary=temporary, 2004 materialized=materialized, 2005 cascade=self._match_text_seq("CASCADE"), 2006 constraints=self._match_text_seq("CONSTRAINTS"), 2007 purge=self._match_text_seq("PURGE"), 2008 cluster=cluster, 2009 concurrently=concurrently, 2010 ) 2011 2012 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 2013 return ( 2014 self._match_text_seq("IF") 2015 and (not not_ or self._match(TokenType.NOT)) 2016 and self._match(TokenType.EXISTS) 2017 ) 2018 2019 def _parse_create(self) -> exp.Create | exp.Command: 2020 # Note: this can't be None because we've matched a statement parser 2021 start = self._prev 2022 2023 replace = ( 2024 start.token_type == TokenType.REPLACE 2025 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2026 or self._match_pair(TokenType.OR, TokenType.ALTER) 2027 ) 2028 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2029 2030 unique = self._match(TokenType.UNIQUE) 2031 2032 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2033 clustered = True 2034 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2035 "COLUMNSTORE" 2036 ): 2037 clustered = False 2038 else: 2039 clustered = None 2040 2041 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2042 self._advance() 2043 2044 properties = None 2045 create_token = self._match_set(self.CREATABLES) and self._prev 2046 2047 if not create_token: 2048 # exp.Properties.Location.POST_CREATE 2049 properties = self._parse_properties() 2050 create_token = self._match_set(self.CREATABLES) and self._prev 2051 2052 if not properties or not create_token: 2053 return self._parse_as_command(start) 2054 2055 concurrently = self._match_text_seq("CONCURRENTLY") 2056 exists = self._parse_exists(not_=True) 2057 this = None 2058 expression: t.Optional[exp.Expression] = None 2059 indexes = None 2060 no_schema_binding = None 2061 begin = None 2062 end = None 2063 clone = None 2064 2065 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2066 nonlocal properties 2067 if properties and temp_props: 2068 properties.expressions.extend(temp_props.expressions) 2069 elif temp_props: 2070 properties = temp_props 2071 2072 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2073 this = self._parse_user_defined_function(kind=create_token.token_type) 2074 2075 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2076 extend_props(self._parse_properties()) 2077 2078 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2079 extend_props(self._parse_properties()) 2080 2081 if not expression: 2082 if self._match(TokenType.COMMAND): 2083 expression = self._parse_as_command(self._prev) 2084 else: 2085 begin = self._match(TokenType.BEGIN) 2086 return_ = self._match_text_seq("RETURN") 2087 2088 if self._match(TokenType.STRING, advance=False): 2089 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2090 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2091 expression = self._parse_string() 2092 extend_props(self._parse_properties()) 2093 else: 2094 expression = self._parse_user_defined_function_expression() 2095 2096 end = self._match_text_seq("END") 2097 2098 if return_: 2099 expression = self.expression(exp.Return, this=expression) 2100 elif create_token.token_type == TokenType.INDEX: 2101 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2102 if not self._match(TokenType.ON): 2103 index = self._parse_id_var() 2104 anonymous = False 2105 else: 2106 index = None 2107 anonymous = True 2108 2109 this = self._parse_index(index=index, anonymous=anonymous) 2110 elif create_token.token_type in self.DB_CREATABLES: 2111 table_parts = self._parse_table_parts( 2112 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2113 ) 2114 2115 # exp.Properties.Location.POST_NAME 2116 self._match(TokenType.COMMA) 2117 extend_props(self._parse_properties(before=True)) 2118 2119 this = self._parse_schema(this=table_parts) 2120 2121 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2122 extend_props(self._parse_properties()) 2123 2124 has_alias = self._match(TokenType.ALIAS) 2125 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2126 # exp.Properties.Location.POST_ALIAS 2127 extend_props(self._parse_properties()) 2128 2129 if create_token.token_type == TokenType.SEQUENCE: 2130 expression = self._parse_types() 2131 extend_props(self._parse_properties()) 2132 else: 2133 expression = self._parse_ddl_select() 2134 2135 # Some dialects also support using a table as an alias instead of a SELECT. 2136 # Here we fallback to this as an alternative. 2137 if not expression and has_alias: 2138 expression = self._try_parse(self._parse_table_parts) 2139 2140 if create_token.token_type == TokenType.TABLE: 2141 # exp.Properties.Location.POST_EXPRESSION 2142 extend_props(self._parse_properties()) 2143 2144 indexes = [] 2145 while True: 2146 index = self._parse_index() 2147 2148 # exp.Properties.Location.POST_INDEX 2149 extend_props(self._parse_properties()) 2150 if not index: 2151 break 2152 else: 2153 self._match(TokenType.COMMA) 2154 indexes.append(index) 2155 elif create_token.token_type == TokenType.VIEW: 2156 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2157 no_schema_binding = True 2158 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2159 extend_props(self._parse_properties()) 2160 2161 shallow = self._match_text_seq("SHALLOW") 2162 2163 if self._match_texts(self.CLONE_KEYWORDS): 2164 copy = self._prev.text.lower() == "copy" 2165 clone = self.expression( 2166 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2167 ) 2168 2169 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2170 return self._parse_as_command(start) 2171 2172 create_kind_text = create_token.text.upper() 2173 return self.expression( 2174 exp.Create, 2175 this=this, 2176 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2177 replace=replace, 2178 refresh=refresh, 2179 unique=unique, 2180 expression=expression, 2181 exists=exists, 2182 properties=properties, 2183 indexes=indexes, 2184 no_schema_binding=no_schema_binding, 2185 begin=begin, 2186 end=end, 2187 clone=clone, 2188 concurrently=concurrently, 2189 clustered=clustered, 2190 ) 2191 2192 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2193 seq = exp.SequenceProperties() 2194 2195 options = [] 2196 index = self._index 2197 2198 while self._curr: 2199 self._match(TokenType.COMMA) 2200 if self._match_text_seq("INCREMENT"): 2201 self._match_text_seq("BY") 2202 self._match_text_seq("=") 2203 seq.set("increment", self._parse_term()) 2204 elif self._match_text_seq("MINVALUE"): 2205 seq.set("minvalue", self._parse_term()) 2206 elif self._match_text_seq("MAXVALUE"): 2207 seq.set("maxvalue", self._parse_term()) 2208 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2209 self._match_text_seq("=") 2210 seq.set("start", self._parse_term()) 2211 elif self._match_text_seq("CACHE"): 2212 # T-SQL allows empty CACHE which is initialized dynamically 2213 seq.set("cache", self._parse_number() or True) 2214 elif self._match_text_seq("OWNED", "BY"): 2215 # "OWNED BY NONE" is the default 2216 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2217 else: 2218 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2219 if opt: 2220 options.append(opt) 2221 else: 2222 break 2223 2224 seq.set("options", options if options else None) 2225 return None if self._index == index else seq 2226 2227 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2228 # only used for teradata currently 2229 self._match(TokenType.COMMA) 2230 2231 kwargs = { 2232 "no": self._match_text_seq("NO"), 2233 "dual": self._match_text_seq("DUAL"), 2234 "before": self._match_text_seq("BEFORE"), 2235 "default": self._match_text_seq("DEFAULT"), 2236 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2237 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2238 "after": self._match_text_seq("AFTER"), 2239 "minimum": self._match_texts(("MIN", "MINIMUM")), 2240 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2241 } 2242 2243 if self._match_texts(self.PROPERTY_PARSERS): 2244 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2245 try: 2246 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2247 except TypeError: 2248 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2249 2250 return None 2251 2252 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2253 return self._parse_wrapped_csv(self._parse_property) 2254 2255 def _parse_property(self) -> t.Optional[exp.Expression]: 2256 if self._match_texts(self.PROPERTY_PARSERS): 2257 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2258 2259 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2260 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2261 2262 if self._match_text_seq("COMPOUND", "SORTKEY"): 2263 return self._parse_sortkey(compound=True) 2264 2265 if self._match_text_seq("SQL", "SECURITY"): 2266 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2267 2268 index = self._index 2269 key = self._parse_column() 2270 2271 if not self._match(TokenType.EQ): 2272 self._retreat(index) 2273 return self._parse_sequence_properties() 2274 2275 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2276 if isinstance(key, exp.Column): 2277 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2278 2279 value = self._parse_bitwise() or self._parse_var(any_token=True) 2280 2281 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2282 if isinstance(value, exp.Column): 2283 value = exp.var(value.name) 2284 2285 return self.expression(exp.Property, this=key, value=value) 2286 2287 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2288 if self._match_text_seq("BY"): 2289 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2290 2291 self._match(TokenType.ALIAS) 2292 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2293 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2294 2295 return self.expression( 2296 exp.FileFormatProperty, 2297 this=( 2298 self.expression( 2299 exp.InputOutputFormat, 2300 input_format=input_format, 2301 output_format=output_format, 2302 ) 2303 if input_format or output_format 2304 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2305 ), 2306 ) 2307 2308 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2309 field = self._parse_field() 2310 if isinstance(field, exp.Identifier) and not field.quoted: 2311 field = exp.var(field) 2312 2313 return field 2314 2315 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2316 self._match(TokenType.EQ) 2317 self._match(TokenType.ALIAS) 2318 2319 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2320 2321 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2322 properties = [] 2323 while True: 2324 if before: 2325 prop = self._parse_property_before() 2326 else: 2327 prop = self._parse_property() 2328 if not prop: 2329 break 2330 for p in ensure_list(prop): 2331 properties.append(p) 2332 2333 if properties: 2334 return self.expression(exp.Properties, expressions=properties) 2335 2336 return None 2337 2338 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2339 return self.expression( 2340 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2341 ) 2342 2343 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2344 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2345 security_specifier = self._prev.text.upper() 2346 return self.expression(exp.SecurityProperty, this=security_specifier) 2347 return None 2348 2349 def _parse_settings_property(self) -> exp.SettingsProperty: 2350 return self.expression( 2351 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2352 ) 2353 2354 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2355 if self._index >= 2: 2356 pre_volatile_token = self._tokens[self._index - 2] 2357 else: 2358 pre_volatile_token = None 2359 2360 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2361 return exp.VolatileProperty() 2362 2363 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2364 2365 def _parse_retention_period(self) -> exp.Var: 2366 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2367 number = self._parse_number() 2368 number_str = f"{number} " if number else "" 2369 unit = self._parse_var(any_token=True) 2370 return exp.var(f"{number_str}{unit}") 2371 2372 def _parse_system_versioning_property( 2373 self, with_: bool = False 2374 ) -> exp.WithSystemVersioningProperty: 2375 self._match(TokenType.EQ) 2376 prop = self.expression( 2377 exp.WithSystemVersioningProperty, 2378 **{ # type: ignore 2379 "on": True, 2380 "with": with_, 2381 }, 2382 ) 2383 2384 if self._match_text_seq("OFF"): 2385 prop.set("on", False) 2386 return prop 2387 2388 self._match(TokenType.ON) 2389 if self._match(TokenType.L_PAREN): 2390 while self._curr and not self._match(TokenType.R_PAREN): 2391 if self._match_text_seq("HISTORY_TABLE", "="): 2392 prop.set("this", self._parse_table_parts()) 2393 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2394 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2395 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2396 prop.set("retention_period", self._parse_retention_period()) 2397 2398 self._match(TokenType.COMMA) 2399 2400 return prop 2401 2402 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2403 self._match(TokenType.EQ) 2404 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2405 prop = self.expression(exp.DataDeletionProperty, on=on) 2406 2407 if self._match(TokenType.L_PAREN): 2408 while self._curr and not self._match(TokenType.R_PAREN): 2409 if self._match_text_seq("FILTER_COLUMN", "="): 2410 prop.set("filter_column", self._parse_column()) 2411 elif self._match_text_seq("RETENTION_PERIOD", "="): 2412 prop.set("retention_period", self._parse_retention_period()) 2413 2414 self._match(TokenType.COMMA) 2415 2416 return prop 2417 2418 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2419 kind = "HASH" 2420 expressions: t.Optional[t.List[exp.Expression]] = None 2421 if self._match_text_seq("BY", "HASH"): 2422 expressions = self._parse_wrapped_csv(self._parse_id_var) 2423 elif self._match_text_seq("BY", "RANDOM"): 2424 kind = "RANDOM" 2425 2426 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2427 buckets: t.Optional[exp.Expression] = None 2428 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2429 buckets = self._parse_number() 2430 2431 return self.expression( 2432 exp.DistributedByProperty, 2433 expressions=expressions, 2434 kind=kind, 2435 buckets=buckets, 2436 order=self._parse_order(), 2437 ) 2438 2439 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2440 self._match_text_seq("KEY") 2441 expressions = self._parse_wrapped_id_vars() 2442 return self.expression(expr_type, expressions=expressions) 2443 2444 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2445 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2446 prop = self._parse_system_versioning_property(with_=True) 2447 self._match_r_paren() 2448 return prop 2449 2450 if self._match(TokenType.L_PAREN, advance=False): 2451 return self._parse_wrapped_properties() 2452 2453 if self._match_text_seq("JOURNAL"): 2454 return self._parse_withjournaltable() 2455 2456 if self._match_texts(self.VIEW_ATTRIBUTES): 2457 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2458 2459 if self._match_text_seq("DATA"): 2460 return self._parse_withdata(no=False) 2461 elif self._match_text_seq("NO", "DATA"): 2462 return self._parse_withdata(no=True) 2463 2464 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2465 return self._parse_serde_properties(with_=True) 2466 2467 if self._match(TokenType.SCHEMA): 2468 return self.expression( 2469 exp.WithSchemaBindingProperty, 2470 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2471 ) 2472 2473 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2474 return self.expression( 2475 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2476 ) 2477 2478 if not self._next: 2479 return None 2480 2481 return self._parse_withisolatedloading() 2482 2483 def _parse_procedure_option(self) -> exp.Expression | None: 2484 if self._match_text_seq("EXECUTE", "AS"): 2485 return self.expression( 2486 exp.ExecuteAsProperty, 2487 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2488 or self._parse_string(), 2489 ) 2490 2491 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2492 2493 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2494 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2495 self._match(TokenType.EQ) 2496 2497 user = self._parse_id_var() 2498 self._match(TokenType.PARAMETER) 2499 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2500 2501 if not user or not host: 2502 return None 2503 2504 return exp.DefinerProperty(this=f"{user}@{host}") 2505 2506 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2507 self._match(TokenType.TABLE) 2508 self._match(TokenType.EQ) 2509 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2510 2511 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2512 return self.expression(exp.LogProperty, no=no) 2513 2514 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2515 return self.expression(exp.JournalProperty, **kwargs) 2516 2517 def _parse_checksum(self) -> exp.ChecksumProperty: 2518 self._match(TokenType.EQ) 2519 2520 on = None 2521 if self._match(TokenType.ON): 2522 on = True 2523 elif self._match_text_seq("OFF"): 2524 on = False 2525 2526 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2527 2528 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2529 return self.expression( 2530 exp.Cluster, 2531 expressions=( 2532 self._parse_wrapped_csv(self._parse_ordered) 2533 if wrapped 2534 else self._parse_csv(self._parse_ordered) 2535 ), 2536 ) 2537 2538 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2539 self._match_text_seq("BY") 2540 2541 self._match_l_paren() 2542 expressions = self._parse_csv(self._parse_column) 2543 self._match_r_paren() 2544 2545 if self._match_text_seq("SORTED", "BY"): 2546 self._match_l_paren() 2547 sorted_by = self._parse_csv(self._parse_ordered) 2548 self._match_r_paren() 2549 else: 2550 sorted_by = None 2551 2552 self._match(TokenType.INTO) 2553 buckets = self._parse_number() 2554 self._match_text_seq("BUCKETS") 2555 2556 return self.expression( 2557 exp.ClusteredByProperty, 2558 expressions=expressions, 2559 sorted_by=sorted_by, 2560 buckets=buckets, 2561 ) 2562 2563 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2564 if not self._match_text_seq("GRANTS"): 2565 self._retreat(self._index - 1) 2566 return None 2567 2568 return self.expression(exp.CopyGrantsProperty) 2569 2570 def _parse_freespace(self) -> exp.FreespaceProperty: 2571 self._match(TokenType.EQ) 2572 return self.expression( 2573 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2574 ) 2575 2576 def _parse_mergeblockratio( 2577 self, no: bool = False, default: bool = False 2578 ) -> exp.MergeBlockRatioProperty: 2579 if self._match(TokenType.EQ): 2580 return self.expression( 2581 exp.MergeBlockRatioProperty, 2582 this=self._parse_number(), 2583 percent=self._match(TokenType.PERCENT), 2584 ) 2585 2586 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2587 2588 def _parse_datablocksize( 2589 self, 2590 default: t.Optional[bool] = None, 2591 minimum: t.Optional[bool] = None, 2592 maximum: t.Optional[bool] = None, 2593 ) -> exp.DataBlocksizeProperty: 2594 self._match(TokenType.EQ) 2595 size = self._parse_number() 2596 2597 units = None 2598 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2599 units = self._prev.text 2600 2601 return self.expression( 2602 exp.DataBlocksizeProperty, 2603 size=size, 2604 units=units, 2605 default=default, 2606 minimum=minimum, 2607 maximum=maximum, 2608 ) 2609 2610 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2611 self._match(TokenType.EQ) 2612 always = self._match_text_seq("ALWAYS") 2613 manual = self._match_text_seq("MANUAL") 2614 never = self._match_text_seq("NEVER") 2615 default = self._match_text_seq("DEFAULT") 2616 2617 autotemp = None 2618 if self._match_text_seq("AUTOTEMP"): 2619 autotemp = self._parse_schema() 2620 2621 return self.expression( 2622 exp.BlockCompressionProperty, 2623 always=always, 2624 manual=manual, 2625 never=never, 2626 default=default, 2627 autotemp=autotemp, 2628 ) 2629 2630 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2631 index = self._index 2632 no = self._match_text_seq("NO") 2633 concurrent = self._match_text_seq("CONCURRENT") 2634 2635 if not self._match_text_seq("ISOLATED", "LOADING"): 2636 self._retreat(index) 2637 return None 2638 2639 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2640 return self.expression( 2641 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2642 ) 2643 2644 def _parse_locking(self) -> exp.LockingProperty: 2645 if self._match(TokenType.TABLE): 2646 kind = "TABLE" 2647 elif self._match(TokenType.VIEW): 2648 kind = "VIEW" 2649 elif self._match(TokenType.ROW): 2650 kind = "ROW" 2651 elif self._match_text_seq("DATABASE"): 2652 kind = "DATABASE" 2653 else: 2654 kind = None 2655 2656 if kind in ("DATABASE", "TABLE", "VIEW"): 2657 this = self._parse_table_parts() 2658 else: 2659 this = None 2660 2661 if self._match(TokenType.FOR): 2662 for_or_in = "FOR" 2663 elif self._match(TokenType.IN): 2664 for_or_in = "IN" 2665 else: 2666 for_or_in = None 2667 2668 if self._match_text_seq("ACCESS"): 2669 lock_type = "ACCESS" 2670 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2671 lock_type = "EXCLUSIVE" 2672 elif self._match_text_seq("SHARE"): 2673 lock_type = "SHARE" 2674 elif self._match_text_seq("READ"): 2675 lock_type = "READ" 2676 elif self._match_text_seq("WRITE"): 2677 lock_type = "WRITE" 2678 elif self._match_text_seq("CHECKSUM"): 2679 lock_type = "CHECKSUM" 2680 else: 2681 lock_type = None 2682 2683 override = self._match_text_seq("OVERRIDE") 2684 2685 return self.expression( 2686 exp.LockingProperty, 2687 this=this, 2688 kind=kind, 2689 for_or_in=for_or_in, 2690 lock_type=lock_type, 2691 override=override, 2692 ) 2693 2694 def _parse_partition_by(self) -> t.List[exp.Expression]: 2695 if self._match(TokenType.PARTITION_BY): 2696 return self._parse_csv(self._parse_assignment) 2697 return [] 2698 2699 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2700 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2701 if self._match_text_seq("MINVALUE"): 2702 return exp.var("MINVALUE") 2703 if self._match_text_seq("MAXVALUE"): 2704 return exp.var("MAXVALUE") 2705 return self._parse_bitwise() 2706 2707 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2708 expression = None 2709 from_expressions = None 2710 to_expressions = None 2711 2712 if self._match(TokenType.IN): 2713 this = self._parse_wrapped_csv(self._parse_bitwise) 2714 elif self._match(TokenType.FROM): 2715 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2716 self._match_text_seq("TO") 2717 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2718 elif self._match_text_seq("WITH", "(", "MODULUS"): 2719 this = self._parse_number() 2720 self._match_text_seq(",", "REMAINDER") 2721 expression = self._parse_number() 2722 self._match_r_paren() 2723 else: 2724 self.raise_error("Failed to parse partition bound spec.") 2725 2726 return self.expression( 2727 exp.PartitionBoundSpec, 2728 this=this, 2729 expression=expression, 2730 from_expressions=from_expressions, 2731 to_expressions=to_expressions, 2732 ) 2733 2734 # https://www.postgresql.org/docs/current/sql-createtable.html 2735 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2736 if not self._match_text_seq("OF"): 2737 self._retreat(self._index - 1) 2738 return None 2739 2740 this = self._parse_table(schema=True) 2741 2742 if self._match(TokenType.DEFAULT): 2743 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2744 elif self._match_text_seq("FOR", "VALUES"): 2745 expression = self._parse_partition_bound_spec() 2746 else: 2747 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2748 2749 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2750 2751 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2752 self._match(TokenType.EQ) 2753 return self.expression( 2754 exp.PartitionedByProperty, 2755 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2756 ) 2757 2758 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2759 if self._match_text_seq("AND", "STATISTICS"): 2760 statistics = True 2761 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2762 statistics = False 2763 else: 2764 statistics = None 2765 2766 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2767 2768 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2769 if self._match_text_seq("SQL"): 2770 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2771 return None 2772 2773 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2774 if self._match_text_seq("SQL", "DATA"): 2775 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2776 return None 2777 2778 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2779 if self._match_text_seq("PRIMARY", "INDEX"): 2780 return exp.NoPrimaryIndexProperty() 2781 if self._match_text_seq("SQL"): 2782 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2783 return None 2784 2785 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2786 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2787 return exp.OnCommitProperty() 2788 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2789 return exp.OnCommitProperty(delete=True) 2790 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2791 2792 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2793 if self._match_text_seq("SQL", "DATA"): 2794 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2795 return None 2796 2797 def _parse_distkey(self) -> exp.DistKeyProperty: 2798 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2799 2800 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2801 table = self._parse_table(schema=True) 2802 2803 options = [] 2804 while self._match_texts(("INCLUDING", "EXCLUDING")): 2805 this = self._prev.text.upper() 2806 2807 id_var = self._parse_id_var() 2808 if not id_var: 2809 return None 2810 2811 options.append( 2812 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2813 ) 2814 2815 return self.expression(exp.LikeProperty, this=table, expressions=options) 2816 2817 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2818 return self.expression( 2819 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2820 ) 2821 2822 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2823 self._match(TokenType.EQ) 2824 return self.expression( 2825 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2826 ) 2827 2828 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2829 self._match_text_seq("WITH", "CONNECTION") 2830 return self.expression( 2831 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2832 ) 2833 2834 def _parse_returns(self) -> exp.ReturnsProperty: 2835 value: t.Optional[exp.Expression] 2836 null = None 2837 is_table = self._match(TokenType.TABLE) 2838 2839 if is_table: 2840 if self._match(TokenType.LT): 2841 value = self.expression( 2842 exp.Schema, 2843 this="TABLE", 2844 expressions=self._parse_csv(self._parse_struct_types), 2845 ) 2846 if not self._match(TokenType.GT): 2847 self.raise_error("Expecting >") 2848 else: 2849 value = self._parse_schema(exp.var("TABLE")) 2850 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2851 null = True 2852 value = None 2853 else: 2854 value = self._parse_types() 2855 2856 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2857 2858 def _parse_describe(self) -> exp.Describe: 2859 kind = self._match_set(self.CREATABLES) and self._prev.text 2860 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2861 if self._match(TokenType.DOT): 2862 style = None 2863 self._retreat(self._index - 2) 2864 2865 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2866 2867 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2868 this = self._parse_statement() 2869 else: 2870 this = self._parse_table(schema=True) 2871 2872 properties = self._parse_properties() 2873 expressions = properties.expressions if properties else None 2874 partition = self._parse_partition() 2875 return self.expression( 2876 exp.Describe, 2877 this=this, 2878 style=style, 2879 kind=kind, 2880 expressions=expressions, 2881 partition=partition, 2882 format=format, 2883 ) 2884 2885 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2886 kind = self._prev.text.upper() 2887 expressions = [] 2888 2889 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2890 if self._match(TokenType.WHEN): 2891 expression = self._parse_disjunction() 2892 self._match(TokenType.THEN) 2893 else: 2894 expression = None 2895 2896 else_ = self._match(TokenType.ELSE) 2897 2898 if not self._match(TokenType.INTO): 2899 return None 2900 2901 return self.expression( 2902 exp.ConditionalInsert, 2903 this=self.expression( 2904 exp.Insert, 2905 this=self._parse_table(schema=True), 2906 expression=self._parse_derived_table_values(), 2907 ), 2908 expression=expression, 2909 else_=else_, 2910 ) 2911 2912 expression = parse_conditional_insert() 2913 while expression is not None: 2914 expressions.append(expression) 2915 expression = parse_conditional_insert() 2916 2917 return self.expression( 2918 exp.MultitableInserts, 2919 kind=kind, 2920 comments=comments, 2921 expressions=expressions, 2922 source=self._parse_table(), 2923 ) 2924 2925 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2926 comments = [] 2927 hint = self._parse_hint() 2928 overwrite = self._match(TokenType.OVERWRITE) 2929 ignore = self._match(TokenType.IGNORE) 2930 local = self._match_text_seq("LOCAL") 2931 alternative = None 2932 is_function = None 2933 2934 if self._match_text_seq("DIRECTORY"): 2935 this: t.Optional[exp.Expression] = self.expression( 2936 exp.Directory, 2937 this=self._parse_var_or_string(), 2938 local=local, 2939 row_format=self._parse_row_format(match_row=True), 2940 ) 2941 else: 2942 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2943 comments += ensure_list(self._prev_comments) 2944 return self._parse_multitable_inserts(comments) 2945 2946 if self._match(TokenType.OR): 2947 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2948 2949 self._match(TokenType.INTO) 2950 comments += ensure_list(self._prev_comments) 2951 self._match(TokenType.TABLE) 2952 is_function = self._match(TokenType.FUNCTION) 2953 2954 this = ( 2955 self._parse_table(schema=True, parse_partition=True) 2956 if not is_function 2957 else self._parse_function() 2958 ) 2959 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2960 this.set("alias", self._parse_table_alias()) 2961 2962 returning = self._parse_returning() 2963 2964 return self.expression( 2965 exp.Insert, 2966 comments=comments, 2967 hint=hint, 2968 is_function=is_function, 2969 this=this, 2970 stored=self._match_text_seq("STORED") and self._parse_stored(), 2971 by_name=self._match_text_seq("BY", "NAME"), 2972 exists=self._parse_exists(), 2973 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2974 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2975 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2976 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2977 conflict=self._parse_on_conflict(), 2978 returning=returning or self._parse_returning(), 2979 overwrite=overwrite, 2980 alternative=alternative, 2981 ignore=ignore, 2982 source=self._match(TokenType.TABLE) and self._parse_table(), 2983 ) 2984 2985 def _parse_kill(self) -> exp.Kill: 2986 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2987 2988 return self.expression( 2989 exp.Kill, 2990 this=self._parse_primary(), 2991 kind=kind, 2992 ) 2993 2994 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2995 conflict = self._match_text_seq("ON", "CONFLICT") 2996 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2997 2998 if not conflict and not duplicate: 2999 return None 3000 3001 conflict_keys = None 3002 constraint = None 3003 3004 if conflict: 3005 if self._match_text_seq("ON", "CONSTRAINT"): 3006 constraint = self._parse_id_var() 3007 elif self._match(TokenType.L_PAREN): 3008 conflict_keys = self._parse_csv(self._parse_id_var) 3009 self._match_r_paren() 3010 3011 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3012 if self._prev.token_type == TokenType.UPDATE: 3013 self._match(TokenType.SET) 3014 expressions = self._parse_csv(self._parse_equality) 3015 else: 3016 expressions = None 3017 3018 return self.expression( 3019 exp.OnConflict, 3020 duplicate=duplicate, 3021 expressions=expressions, 3022 action=action, 3023 conflict_keys=conflict_keys, 3024 constraint=constraint, 3025 where=self._parse_where(), 3026 ) 3027 3028 def _parse_returning(self) -> t.Optional[exp.Returning]: 3029 if not self._match(TokenType.RETURNING): 3030 return None 3031 return self.expression( 3032 exp.Returning, 3033 expressions=self._parse_csv(self._parse_expression), 3034 into=self._match(TokenType.INTO) and self._parse_table_part(), 3035 ) 3036 3037 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3038 if not self._match(TokenType.FORMAT): 3039 return None 3040 return self._parse_row_format() 3041 3042 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3043 index = self._index 3044 with_ = with_ or self._match_text_seq("WITH") 3045 3046 if not self._match(TokenType.SERDE_PROPERTIES): 3047 self._retreat(index) 3048 return None 3049 return self.expression( 3050 exp.SerdeProperties, 3051 **{ # type: ignore 3052 "expressions": self._parse_wrapped_properties(), 3053 "with": with_, 3054 }, 3055 ) 3056 3057 def _parse_row_format( 3058 self, match_row: bool = False 3059 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3060 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3061 return None 3062 3063 if self._match_text_seq("SERDE"): 3064 this = self._parse_string() 3065 3066 serde_properties = self._parse_serde_properties() 3067 3068 return self.expression( 3069 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3070 ) 3071 3072 self._match_text_seq("DELIMITED") 3073 3074 kwargs = {} 3075 3076 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3077 kwargs["fields"] = self._parse_string() 3078 if self._match_text_seq("ESCAPED", "BY"): 3079 kwargs["escaped"] = self._parse_string() 3080 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3081 kwargs["collection_items"] = self._parse_string() 3082 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3083 kwargs["map_keys"] = self._parse_string() 3084 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3085 kwargs["lines"] = self._parse_string() 3086 if self._match_text_seq("NULL", "DEFINED", "AS"): 3087 kwargs["null"] = self._parse_string() 3088 3089 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3090 3091 def _parse_load(self) -> exp.LoadData | exp.Command: 3092 if self._match_text_seq("DATA"): 3093 local = self._match_text_seq("LOCAL") 3094 self._match_text_seq("INPATH") 3095 inpath = self._parse_string() 3096 overwrite = self._match(TokenType.OVERWRITE) 3097 self._match_pair(TokenType.INTO, TokenType.TABLE) 3098 3099 return self.expression( 3100 exp.LoadData, 3101 this=self._parse_table(schema=True), 3102 local=local, 3103 overwrite=overwrite, 3104 inpath=inpath, 3105 partition=self._parse_partition(), 3106 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3107 serde=self._match_text_seq("SERDE") and self._parse_string(), 3108 ) 3109 return self._parse_as_command(self._prev) 3110 3111 def _parse_delete(self) -> exp.Delete: 3112 # This handles MySQL's "Multiple-Table Syntax" 3113 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3114 tables = None 3115 if not self._match(TokenType.FROM, advance=False): 3116 tables = self._parse_csv(self._parse_table) or None 3117 3118 returning = self._parse_returning() 3119 3120 return self.expression( 3121 exp.Delete, 3122 tables=tables, 3123 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3124 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3125 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3126 where=self._parse_where(), 3127 returning=returning or self._parse_returning(), 3128 limit=self._parse_limit(), 3129 ) 3130 3131 def _parse_update(self) -> exp.Update: 3132 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3133 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3134 returning = self._parse_returning() 3135 return self.expression( 3136 exp.Update, 3137 **{ # type: ignore 3138 "this": this, 3139 "expressions": expressions, 3140 "from": self._parse_from(joins=True), 3141 "where": self._parse_where(), 3142 "returning": returning or self._parse_returning(), 3143 "order": self._parse_order(), 3144 "limit": self._parse_limit(), 3145 }, 3146 ) 3147 3148 def _parse_use(self) -> exp.Use: 3149 return self.expression( 3150 exp.Use, 3151 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3152 this=self._parse_table(schema=False), 3153 ) 3154 3155 def _parse_uncache(self) -> exp.Uncache: 3156 if not self._match(TokenType.TABLE): 3157 self.raise_error("Expecting TABLE after UNCACHE") 3158 3159 return self.expression( 3160 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3161 ) 3162 3163 def _parse_cache(self) -> exp.Cache: 3164 lazy = self._match_text_seq("LAZY") 3165 self._match(TokenType.TABLE) 3166 table = self._parse_table(schema=True) 3167 3168 options = [] 3169 if self._match_text_seq("OPTIONS"): 3170 self._match_l_paren() 3171 k = self._parse_string() 3172 self._match(TokenType.EQ) 3173 v = self._parse_string() 3174 options = [k, v] 3175 self._match_r_paren() 3176 3177 self._match(TokenType.ALIAS) 3178 return self.expression( 3179 exp.Cache, 3180 this=table, 3181 lazy=lazy, 3182 options=options, 3183 expression=self._parse_select(nested=True), 3184 ) 3185 3186 def _parse_partition(self) -> t.Optional[exp.Partition]: 3187 if not self._match_texts(self.PARTITION_KEYWORDS): 3188 return None 3189 3190 return self.expression( 3191 exp.Partition, 3192 subpartition=self._prev.text.upper() == "SUBPARTITION", 3193 expressions=self._parse_wrapped_csv(self._parse_assignment), 3194 ) 3195 3196 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3197 def _parse_value_expression() -> t.Optional[exp.Expression]: 3198 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3199 return exp.var(self._prev.text.upper()) 3200 return self._parse_expression() 3201 3202 if self._match(TokenType.L_PAREN): 3203 expressions = self._parse_csv(_parse_value_expression) 3204 self._match_r_paren() 3205 return self.expression(exp.Tuple, expressions=expressions) 3206 3207 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3208 expression = self._parse_expression() 3209 if expression: 3210 return self.expression(exp.Tuple, expressions=[expression]) 3211 return None 3212 3213 def _parse_projections(self) -> t.List[exp.Expression]: 3214 return self._parse_expressions() 3215 3216 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3217 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3218 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3219 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3220 ) 3221 elif self._match(TokenType.FROM): 3222 from_ = self._parse_from(skip_from_token=True) 3223 # Support parentheses for duckdb FROM-first syntax 3224 select = self._parse_select() 3225 if select: 3226 select.set("from", from_) 3227 this = select 3228 else: 3229 this = exp.select("*").from_(t.cast(exp.From, from_)) 3230 else: 3231 this = ( 3232 self._parse_table() 3233 if table 3234 else self._parse_select(nested=True, parse_set_operation=False) 3235 ) 3236 3237 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3238 # in case a modifier (e.g. join) is following 3239 if table and isinstance(this, exp.Values) and this.alias: 3240 alias = this.args["alias"].pop() 3241 this = exp.Table(this=this, alias=alias) 3242 3243 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3244 3245 return this 3246 3247 def _parse_select( 3248 self, 3249 nested: bool = False, 3250 table: bool = False, 3251 parse_subquery_alias: bool = True, 3252 parse_set_operation: bool = True, 3253 ) -> t.Optional[exp.Expression]: 3254 cte = self._parse_with() 3255 3256 if cte: 3257 this = self._parse_statement() 3258 3259 if not this: 3260 self.raise_error("Failed to parse any statement following CTE") 3261 return cte 3262 3263 if "with" in this.arg_types: 3264 this.set("with", cte) 3265 else: 3266 self.raise_error(f"{this.key} does not support CTE") 3267 this = cte 3268 3269 return this 3270 3271 # duckdb supports leading with FROM x 3272 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3273 3274 if self._match(TokenType.SELECT): 3275 comments = self._prev_comments 3276 3277 hint = self._parse_hint() 3278 3279 if self._next and not self._next.token_type == TokenType.DOT: 3280 all_ = self._match(TokenType.ALL) 3281 distinct = self._match_set(self.DISTINCT_TOKENS) 3282 else: 3283 all_, distinct = None, None 3284 3285 kind = ( 3286 self._match(TokenType.ALIAS) 3287 and self._match_texts(("STRUCT", "VALUE")) 3288 and self._prev.text.upper() 3289 ) 3290 3291 if distinct: 3292 distinct = self.expression( 3293 exp.Distinct, 3294 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3295 ) 3296 3297 if all_ and distinct: 3298 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3299 3300 operation_modifiers = [] 3301 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3302 operation_modifiers.append(exp.var(self._prev.text.upper())) 3303 3304 limit = self._parse_limit(top=True) 3305 projections = self._parse_projections() 3306 3307 this = self.expression( 3308 exp.Select, 3309 kind=kind, 3310 hint=hint, 3311 distinct=distinct, 3312 expressions=projections, 3313 limit=limit, 3314 operation_modifiers=operation_modifiers or None, 3315 ) 3316 this.comments = comments 3317 3318 into = self._parse_into() 3319 if into: 3320 this.set("into", into) 3321 3322 if not from_: 3323 from_ = self._parse_from() 3324 3325 if from_: 3326 this.set("from", from_) 3327 3328 this = self._parse_query_modifiers(this) 3329 elif (table or nested) and self._match(TokenType.L_PAREN): 3330 this = self._parse_wrapped_select(table=table) 3331 3332 # We return early here so that the UNION isn't attached to the subquery by the 3333 # following call to _parse_set_operations, but instead becomes the parent node 3334 self._match_r_paren() 3335 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3336 elif self._match(TokenType.VALUES, advance=False): 3337 this = self._parse_derived_table_values() 3338 elif from_: 3339 this = exp.select("*").from_(from_.this, copy=False) 3340 if self._match(TokenType.PIPE_GT, advance=False): 3341 return self._parse_pipe_syntax_query(this) 3342 elif self._match(TokenType.SUMMARIZE): 3343 table = self._match(TokenType.TABLE) 3344 this = self._parse_select() or self._parse_string() or self._parse_table() 3345 return self.expression(exp.Summarize, this=this, table=table) 3346 elif self._match(TokenType.DESCRIBE): 3347 this = self._parse_describe() 3348 elif self._match_text_seq("STREAM"): 3349 this = self._parse_function() 3350 if this: 3351 this = self.expression(exp.Stream, this=this) 3352 else: 3353 self._retreat(self._index - 1) 3354 else: 3355 this = None 3356 3357 return self._parse_set_operations(this) if parse_set_operation else this 3358 3359 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3360 self._match_text_seq("SEARCH") 3361 3362 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3363 3364 if not kind: 3365 return None 3366 3367 self._match_text_seq("FIRST", "BY") 3368 3369 return self.expression( 3370 exp.RecursiveWithSearch, 3371 kind=kind, 3372 this=self._parse_id_var(), 3373 expression=self._match_text_seq("SET") and self._parse_id_var(), 3374 using=self._match_text_seq("USING") and self._parse_id_var(), 3375 ) 3376 3377 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3378 if not skip_with_token and not self._match(TokenType.WITH): 3379 return None 3380 3381 comments = self._prev_comments 3382 recursive = self._match(TokenType.RECURSIVE) 3383 3384 last_comments = None 3385 expressions = [] 3386 while True: 3387 cte = self._parse_cte() 3388 if isinstance(cte, exp.CTE): 3389 expressions.append(cte) 3390 if last_comments: 3391 cte.add_comments(last_comments) 3392 3393 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3394 break 3395 else: 3396 self._match(TokenType.WITH) 3397 3398 last_comments = self._prev_comments 3399 3400 return self.expression( 3401 exp.With, 3402 comments=comments, 3403 expressions=expressions, 3404 recursive=recursive, 3405 search=self._parse_recursive_with_search(), 3406 ) 3407 3408 def _parse_cte(self) -> t.Optional[exp.CTE]: 3409 index = self._index 3410 3411 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3412 if not alias or not alias.this: 3413 self.raise_error("Expected CTE to have alias") 3414 3415 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3416 self._retreat(index) 3417 return None 3418 3419 comments = self._prev_comments 3420 3421 if self._match_text_seq("NOT", "MATERIALIZED"): 3422 materialized = False 3423 elif self._match_text_seq("MATERIALIZED"): 3424 materialized = True 3425 else: 3426 materialized = None 3427 3428 cte = self.expression( 3429 exp.CTE, 3430 this=self._parse_wrapped(self._parse_statement), 3431 alias=alias, 3432 materialized=materialized, 3433 comments=comments, 3434 ) 3435 3436 if isinstance(cte.this, exp.Values): 3437 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3438 3439 return cte 3440 3441 def _parse_table_alias( 3442 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3443 ) -> t.Optional[exp.TableAlias]: 3444 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3445 # so this section tries to parse the clause version and if it fails, it treats the token 3446 # as an identifier (alias) 3447 if self._can_parse_limit_or_offset(): 3448 return None 3449 3450 any_token = self._match(TokenType.ALIAS) 3451 alias = ( 3452 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3453 or self._parse_string_as_identifier() 3454 ) 3455 3456 index = self._index 3457 if self._match(TokenType.L_PAREN): 3458 columns = self._parse_csv(self._parse_function_parameter) 3459 self._match_r_paren() if columns else self._retreat(index) 3460 else: 3461 columns = None 3462 3463 if not alias and not columns: 3464 return None 3465 3466 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3467 3468 # We bubble up comments from the Identifier to the TableAlias 3469 if isinstance(alias, exp.Identifier): 3470 table_alias.add_comments(alias.pop_comments()) 3471 3472 return table_alias 3473 3474 def _parse_subquery( 3475 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3476 ) -> t.Optional[exp.Subquery]: 3477 if not this: 3478 return None 3479 3480 return self.expression( 3481 exp.Subquery, 3482 this=this, 3483 pivots=self._parse_pivots(), 3484 alias=self._parse_table_alias() if parse_alias else None, 3485 sample=self._parse_table_sample(), 3486 ) 3487 3488 def _implicit_unnests_to_explicit(self, this: E) -> E: 3489 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3490 3491 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3492 for i, join in enumerate(this.args.get("joins") or []): 3493 table = join.this 3494 normalized_table = table.copy() 3495 normalized_table.meta["maybe_column"] = True 3496 normalized_table = _norm(normalized_table, dialect=self.dialect) 3497 3498 if isinstance(table, exp.Table) and not join.args.get("on"): 3499 if normalized_table.parts[0].name in refs: 3500 table_as_column = table.to_column() 3501 unnest = exp.Unnest(expressions=[table_as_column]) 3502 3503 # Table.to_column creates a parent Alias node that we want to convert to 3504 # a TableAlias and attach to the Unnest, so it matches the parser's output 3505 if isinstance(table.args.get("alias"), exp.TableAlias): 3506 table_as_column.replace(table_as_column.this) 3507 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3508 3509 table.replace(unnest) 3510 3511 refs.add(normalized_table.alias_or_name) 3512 3513 return this 3514 3515 def _parse_query_modifiers( 3516 self, this: t.Optional[exp.Expression] 3517 ) -> t.Optional[exp.Expression]: 3518 if isinstance(this, self.MODIFIABLES): 3519 for join in self._parse_joins(): 3520 this.append("joins", join) 3521 for lateral in iter(self._parse_lateral, None): 3522 this.append("laterals", lateral) 3523 3524 while True: 3525 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3526 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3527 key, expression = parser(self) 3528 3529 if expression: 3530 this.set(key, expression) 3531 if key == "limit": 3532 offset = expression.args.pop("offset", None) 3533 3534 if offset: 3535 offset = exp.Offset(expression=offset) 3536 this.set("offset", offset) 3537 3538 limit_by_expressions = expression.expressions 3539 expression.set("expressions", None) 3540 offset.set("expressions", limit_by_expressions) 3541 continue 3542 break 3543 3544 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3545 this = self._implicit_unnests_to_explicit(this) 3546 3547 return this 3548 3549 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3550 start = self._curr 3551 while self._curr: 3552 self._advance() 3553 3554 end = self._tokens[self._index - 1] 3555 return exp.Hint(expressions=[self._find_sql(start, end)]) 3556 3557 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3558 return self._parse_function_call() 3559 3560 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3561 start_index = self._index 3562 should_fallback_to_string = False 3563 3564 hints = [] 3565 try: 3566 for hint in iter( 3567 lambda: self._parse_csv( 3568 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3569 ), 3570 [], 3571 ): 3572 hints.extend(hint) 3573 except ParseError: 3574 should_fallback_to_string = True 3575 3576 if should_fallback_to_string or self._curr: 3577 self._retreat(start_index) 3578 return self._parse_hint_fallback_to_string() 3579 3580 return self.expression(exp.Hint, expressions=hints) 3581 3582 def _parse_hint(self) -> t.Optional[exp.Hint]: 3583 if self._match(TokenType.HINT) and self._prev_comments: 3584 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3585 3586 return None 3587 3588 def _parse_into(self) -> t.Optional[exp.Into]: 3589 if not self._match(TokenType.INTO): 3590 return None 3591 3592 temp = self._match(TokenType.TEMPORARY) 3593 unlogged = self._match_text_seq("UNLOGGED") 3594 self._match(TokenType.TABLE) 3595 3596 return self.expression( 3597 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3598 ) 3599 3600 def _parse_from( 3601 self, joins: bool = False, skip_from_token: bool = False 3602 ) -> t.Optional[exp.From]: 3603 if not skip_from_token and not self._match(TokenType.FROM): 3604 return None 3605 3606 return self.expression( 3607 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3608 ) 3609 3610 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3611 return self.expression( 3612 exp.MatchRecognizeMeasure, 3613 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3614 this=self._parse_expression(), 3615 ) 3616 3617 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3618 if not self._match(TokenType.MATCH_RECOGNIZE): 3619 return None 3620 3621 self._match_l_paren() 3622 3623 partition = self._parse_partition_by() 3624 order = self._parse_order() 3625 3626 measures = ( 3627 self._parse_csv(self._parse_match_recognize_measure) 3628 if self._match_text_seq("MEASURES") 3629 else None 3630 ) 3631 3632 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3633 rows = exp.var("ONE ROW PER MATCH") 3634 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3635 text = "ALL ROWS PER MATCH" 3636 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3637 text += " SHOW EMPTY MATCHES" 3638 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3639 text += " OMIT EMPTY MATCHES" 3640 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3641 text += " WITH UNMATCHED ROWS" 3642 rows = exp.var(text) 3643 else: 3644 rows = None 3645 3646 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3647 text = "AFTER MATCH SKIP" 3648 if self._match_text_seq("PAST", "LAST", "ROW"): 3649 text += " PAST LAST ROW" 3650 elif self._match_text_seq("TO", "NEXT", "ROW"): 3651 text += " TO NEXT ROW" 3652 elif self._match_text_seq("TO", "FIRST"): 3653 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3654 elif self._match_text_seq("TO", "LAST"): 3655 text += f" TO LAST {self._advance_any().text}" # type: ignore 3656 after = exp.var(text) 3657 else: 3658 after = None 3659 3660 if self._match_text_seq("PATTERN"): 3661 self._match_l_paren() 3662 3663 if not self._curr: 3664 self.raise_error("Expecting )", self._curr) 3665 3666 paren = 1 3667 start = self._curr 3668 3669 while self._curr and paren > 0: 3670 if self._curr.token_type == TokenType.L_PAREN: 3671 paren += 1 3672 if self._curr.token_type == TokenType.R_PAREN: 3673 paren -= 1 3674 3675 end = self._prev 3676 self._advance() 3677 3678 if paren > 0: 3679 self.raise_error("Expecting )", self._curr) 3680 3681 pattern = exp.var(self._find_sql(start, end)) 3682 else: 3683 pattern = None 3684 3685 define = ( 3686 self._parse_csv(self._parse_name_as_expression) 3687 if self._match_text_seq("DEFINE") 3688 else None 3689 ) 3690 3691 self._match_r_paren() 3692 3693 return self.expression( 3694 exp.MatchRecognize, 3695 partition_by=partition, 3696 order=order, 3697 measures=measures, 3698 rows=rows, 3699 after=after, 3700 pattern=pattern, 3701 define=define, 3702 alias=self._parse_table_alias(), 3703 ) 3704 3705 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3706 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3707 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3708 cross_apply = False 3709 3710 if cross_apply is not None: 3711 this = self._parse_select(table=True) 3712 view = None 3713 outer = None 3714 elif self._match(TokenType.LATERAL): 3715 this = self._parse_select(table=True) 3716 view = self._match(TokenType.VIEW) 3717 outer = self._match(TokenType.OUTER) 3718 else: 3719 return None 3720 3721 if not this: 3722 this = ( 3723 self._parse_unnest() 3724 or self._parse_function() 3725 or self._parse_id_var(any_token=False) 3726 ) 3727 3728 while self._match(TokenType.DOT): 3729 this = exp.Dot( 3730 this=this, 3731 expression=self._parse_function() or self._parse_id_var(any_token=False), 3732 ) 3733 3734 ordinality: t.Optional[bool] = None 3735 3736 if view: 3737 table = self._parse_id_var(any_token=False) 3738 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3739 table_alias: t.Optional[exp.TableAlias] = self.expression( 3740 exp.TableAlias, this=table, columns=columns 3741 ) 3742 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3743 # We move the alias from the lateral's child node to the lateral itself 3744 table_alias = this.args["alias"].pop() 3745 else: 3746 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3747 table_alias = self._parse_table_alias() 3748 3749 return self.expression( 3750 exp.Lateral, 3751 this=this, 3752 view=view, 3753 outer=outer, 3754 alias=table_alias, 3755 cross_apply=cross_apply, 3756 ordinality=ordinality, 3757 ) 3758 3759 def _parse_join_parts( 3760 self, 3761 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3762 return ( 3763 self._match_set(self.JOIN_METHODS) and self._prev, 3764 self._match_set(self.JOIN_SIDES) and self._prev, 3765 self._match_set(self.JOIN_KINDS) and self._prev, 3766 ) 3767 3768 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3769 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3770 this = self._parse_column() 3771 if isinstance(this, exp.Column): 3772 return this.this 3773 return this 3774 3775 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3776 3777 def _parse_join( 3778 self, skip_join_token: bool = False, parse_bracket: bool = False 3779 ) -> t.Optional[exp.Join]: 3780 if self._match(TokenType.COMMA): 3781 table = self._try_parse(self._parse_table) 3782 if table: 3783 return self.expression(exp.Join, this=table) 3784 return None 3785 3786 index = self._index 3787 method, side, kind = self._parse_join_parts() 3788 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3789 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3790 3791 if not skip_join_token and not join: 3792 self._retreat(index) 3793 kind = None 3794 method = None 3795 side = None 3796 3797 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3798 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3799 3800 if not skip_join_token and not join and not outer_apply and not cross_apply: 3801 return None 3802 3803 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3804 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3805 kwargs["expressions"] = self._parse_csv( 3806 lambda: self._parse_table(parse_bracket=parse_bracket) 3807 ) 3808 3809 if method: 3810 kwargs["method"] = method.text 3811 if side: 3812 kwargs["side"] = side.text 3813 if kind: 3814 kwargs["kind"] = kind.text 3815 if hint: 3816 kwargs["hint"] = hint 3817 3818 if self._match(TokenType.MATCH_CONDITION): 3819 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3820 3821 if self._match(TokenType.ON): 3822 kwargs["on"] = self._parse_assignment() 3823 elif self._match(TokenType.USING): 3824 kwargs["using"] = self._parse_using_identifiers() 3825 elif ( 3826 not (outer_apply or cross_apply) 3827 and not isinstance(kwargs["this"], exp.Unnest) 3828 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3829 ): 3830 index = self._index 3831 joins: t.Optional[list] = list(self._parse_joins()) 3832 3833 if joins and self._match(TokenType.ON): 3834 kwargs["on"] = self._parse_assignment() 3835 elif joins and self._match(TokenType.USING): 3836 kwargs["using"] = self._parse_using_identifiers() 3837 else: 3838 joins = None 3839 self._retreat(index) 3840 3841 kwargs["this"].set("joins", joins if joins else None) 3842 3843 kwargs["pivots"] = self._parse_pivots() 3844 3845 comments = [c for token in (method, side, kind) if token for c in token.comments] 3846 return self.expression(exp.Join, comments=comments, **kwargs) 3847 3848 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3849 this = self._parse_assignment() 3850 3851 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3852 return this 3853 3854 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3855 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3856 3857 return this 3858 3859 def _parse_index_params(self) -> exp.IndexParameters: 3860 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3861 3862 if self._match(TokenType.L_PAREN, advance=False): 3863 columns = self._parse_wrapped_csv(self._parse_with_operator) 3864 else: 3865 columns = None 3866 3867 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3868 partition_by = self._parse_partition_by() 3869 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3870 tablespace = ( 3871 self._parse_var(any_token=True) 3872 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3873 else None 3874 ) 3875 where = self._parse_where() 3876 3877 on = self._parse_field() if self._match(TokenType.ON) else None 3878 3879 return self.expression( 3880 exp.IndexParameters, 3881 using=using, 3882 columns=columns, 3883 include=include, 3884 partition_by=partition_by, 3885 where=where, 3886 with_storage=with_storage, 3887 tablespace=tablespace, 3888 on=on, 3889 ) 3890 3891 def _parse_index( 3892 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3893 ) -> t.Optional[exp.Index]: 3894 if index or anonymous: 3895 unique = None 3896 primary = None 3897 amp = None 3898 3899 self._match(TokenType.ON) 3900 self._match(TokenType.TABLE) # hive 3901 table = self._parse_table_parts(schema=True) 3902 else: 3903 unique = self._match(TokenType.UNIQUE) 3904 primary = self._match_text_seq("PRIMARY") 3905 amp = self._match_text_seq("AMP") 3906 3907 if not self._match(TokenType.INDEX): 3908 return None 3909 3910 index = self._parse_id_var() 3911 table = None 3912 3913 params = self._parse_index_params() 3914 3915 return self.expression( 3916 exp.Index, 3917 this=index, 3918 table=table, 3919 unique=unique, 3920 primary=primary, 3921 amp=amp, 3922 params=params, 3923 ) 3924 3925 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3926 hints: t.List[exp.Expression] = [] 3927 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3928 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3929 hints.append( 3930 self.expression( 3931 exp.WithTableHint, 3932 expressions=self._parse_csv( 3933 lambda: self._parse_function() or self._parse_var(any_token=True) 3934 ), 3935 ) 3936 ) 3937 self._match_r_paren() 3938 else: 3939 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3940 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3941 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3942 3943 self._match_set((TokenType.INDEX, TokenType.KEY)) 3944 if self._match(TokenType.FOR): 3945 hint.set("target", self._advance_any() and self._prev.text.upper()) 3946 3947 hint.set("expressions", self._parse_wrapped_id_vars()) 3948 hints.append(hint) 3949 3950 return hints or None 3951 3952 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3953 return ( 3954 (not schema and self._parse_function(optional_parens=False)) 3955 or self._parse_id_var(any_token=False) 3956 or self._parse_string_as_identifier() 3957 or self._parse_placeholder() 3958 ) 3959 3960 def _parse_table_parts( 3961 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3962 ) -> exp.Table: 3963 catalog = None 3964 db = None 3965 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3966 3967 while self._match(TokenType.DOT): 3968 if catalog: 3969 # This allows nesting the table in arbitrarily many dot expressions if needed 3970 table = self.expression( 3971 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3972 ) 3973 else: 3974 catalog = db 3975 db = table 3976 # "" used for tsql FROM a..b case 3977 table = self._parse_table_part(schema=schema) or "" 3978 3979 if ( 3980 wildcard 3981 and self._is_connected() 3982 and (isinstance(table, exp.Identifier) or not table) 3983 and self._match(TokenType.STAR) 3984 ): 3985 if isinstance(table, exp.Identifier): 3986 table.args["this"] += "*" 3987 else: 3988 table = exp.Identifier(this="*") 3989 3990 # We bubble up comments from the Identifier to the Table 3991 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3992 3993 if is_db_reference: 3994 catalog = db 3995 db = table 3996 table = None 3997 3998 if not table and not is_db_reference: 3999 self.raise_error(f"Expected table name but got {self._curr}") 4000 if not db and is_db_reference: 4001 self.raise_error(f"Expected database name but got {self._curr}") 4002 4003 table = self.expression( 4004 exp.Table, 4005 comments=comments, 4006 this=table, 4007 db=db, 4008 catalog=catalog, 4009 ) 4010 4011 changes = self._parse_changes() 4012 if changes: 4013 table.set("changes", changes) 4014 4015 at_before = self._parse_historical_data() 4016 if at_before: 4017 table.set("when", at_before) 4018 4019 pivots = self._parse_pivots() 4020 if pivots: 4021 table.set("pivots", pivots) 4022 4023 return table 4024 4025 def _parse_table( 4026 self, 4027 schema: bool = False, 4028 joins: bool = False, 4029 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4030 parse_bracket: bool = False, 4031 is_db_reference: bool = False, 4032 parse_partition: bool = False, 4033 ) -> t.Optional[exp.Expression]: 4034 lateral = self._parse_lateral() 4035 if lateral: 4036 return lateral 4037 4038 unnest = self._parse_unnest() 4039 if unnest: 4040 return unnest 4041 4042 values = self._parse_derived_table_values() 4043 if values: 4044 return values 4045 4046 subquery = self._parse_select(table=True) 4047 if subquery: 4048 if not subquery.args.get("pivots"): 4049 subquery.set("pivots", self._parse_pivots()) 4050 return subquery 4051 4052 bracket = parse_bracket and self._parse_bracket(None) 4053 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4054 4055 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4056 self._parse_table 4057 ) 4058 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4059 4060 only = self._match(TokenType.ONLY) 4061 4062 this = t.cast( 4063 exp.Expression, 4064 bracket 4065 or rows_from 4066 or self._parse_bracket( 4067 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4068 ), 4069 ) 4070 4071 if only: 4072 this.set("only", only) 4073 4074 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4075 self._match_text_seq("*") 4076 4077 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4078 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4079 this.set("partition", self._parse_partition()) 4080 4081 if schema: 4082 return self._parse_schema(this=this) 4083 4084 version = self._parse_version() 4085 4086 if version: 4087 this.set("version", version) 4088 4089 if self.dialect.ALIAS_POST_TABLESAMPLE: 4090 this.set("sample", self._parse_table_sample()) 4091 4092 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4093 if alias: 4094 this.set("alias", alias) 4095 4096 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4097 return self.expression( 4098 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4099 ) 4100 4101 this.set("hints", self._parse_table_hints()) 4102 4103 if not this.args.get("pivots"): 4104 this.set("pivots", self._parse_pivots()) 4105 4106 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4107 this.set("sample", self._parse_table_sample()) 4108 4109 if joins: 4110 for join in self._parse_joins(): 4111 this.append("joins", join) 4112 4113 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4114 this.set("ordinality", True) 4115 this.set("alias", self._parse_table_alias()) 4116 4117 return this 4118 4119 def _parse_version(self) -> t.Optional[exp.Version]: 4120 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4121 this = "TIMESTAMP" 4122 elif self._match(TokenType.VERSION_SNAPSHOT): 4123 this = "VERSION" 4124 else: 4125 return None 4126 4127 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4128 kind = self._prev.text.upper() 4129 start = self._parse_bitwise() 4130 self._match_texts(("TO", "AND")) 4131 end = self._parse_bitwise() 4132 expression: t.Optional[exp.Expression] = self.expression( 4133 exp.Tuple, expressions=[start, end] 4134 ) 4135 elif self._match_text_seq("CONTAINED", "IN"): 4136 kind = "CONTAINED IN" 4137 expression = self.expression( 4138 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4139 ) 4140 elif self._match(TokenType.ALL): 4141 kind = "ALL" 4142 expression = None 4143 else: 4144 self._match_text_seq("AS", "OF") 4145 kind = "AS OF" 4146 expression = self._parse_type() 4147 4148 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4149 4150 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4151 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4152 index = self._index 4153 historical_data = None 4154 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4155 this = self._prev.text.upper() 4156 kind = ( 4157 self._match(TokenType.L_PAREN) 4158 and self._match_texts(self.HISTORICAL_DATA_KIND) 4159 and self._prev.text.upper() 4160 ) 4161 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4162 4163 if expression: 4164 self._match_r_paren() 4165 historical_data = self.expression( 4166 exp.HistoricalData, this=this, kind=kind, expression=expression 4167 ) 4168 else: 4169 self._retreat(index) 4170 4171 return historical_data 4172 4173 def _parse_changes(self) -> t.Optional[exp.Changes]: 4174 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4175 return None 4176 4177 information = self._parse_var(any_token=True) 4178 self._match_r_paren() 4179 4180 return self.expression( 4181 exp.Changes, 4182 information=information, 4183 at_before=self._parse_historical_data(), 4184 end=self._parse_historical_data(), 4185 ) 4186 4187 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4188 if not self._match(TokenType.UNNEST): 4189 return None 4190 4191 expressions = self._parse_wrapped_csv(self._parse_equality) 4192 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4193 4194 alias = self._parse_table_alias() if with_alias else None 4195 4196 if alias: 4197 if self.dialect.UNNEST_COLUMN_ONLY: 4198 if alias.args.get("columns"): 4199 self.raise_error("Unexpected extra column alias in unnest.") 4200 4201 alias.set("columns", [alias.this]) 4202 alias.set("this", None) 4203 4204 columns = alias.args.get("columns") or [] 4205 if offset and len(expressions) < len(columns): 4206 offset = columns.pop() 4207 4208 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4209 self._match(TokenType.ALIAS) 4210 offset = self._parse_id_var( 4211 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4212 ) or exp.to_identifier("offset") 4213 4214 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4215 4216 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4217 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4218 if not is_derived and not ( 4219 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4220 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4221 ): 4222 return None 4223 4224 expressions = self._parse_csv(self._parse_value) 4225 alias = self._parse_table_alias() 4226 4227 if is_derived: 4228 self._match_r_paren() 4229 4230 return self.expression( 4231 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4232 ) 4233 4234 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4235 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4236 as_modifier and self._match_text_seq("USING", "SAMPLE") 4237 ): 4238 return None 4239 4240 bucket_numerator = None 4241 bucket_denominator = None 4242 bucket_field = None 4243 percent = None 4244 size = None 4245 seed = None 4246 4247 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4248 matched_l_paren = self._match(TokenType.L_PAREN) 4249 4250 if self.TABLESAMPLE_CSV: 4251 num = None 4252 expressions = self._parse_csv(self._parse_primary) 4253 else: 4254 expressions = None 4255 num = ( 4256 self._parse_factor() 4257 if self._match(TokenType.NUMBER, advance=False) 4258 else self._parse_primary() or self._parse_placeholder() 4259 ) 4260 4261 if self._match_text_seq("BUCKET"): 4262 bucket_numerator = self._parse_number() 4263 self._match_text_seq("OUT", "OF") 4264 bucket_denominator = bucket_denominator = self._parse_number() 4265 self._match(TokenType.ON) 4266 bucket_field = self._parse_field() 4267 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4268 percent = num 4269 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4270 size = num 4271 else: 4272 percent = num 4273 4274 if matched_l_paren: 4275 self._match_r_paren() 4276 4277 if self._match(TokenType.L_PAREN): 4278 method = self._parse_var(upper=True) 4279 seed = self._match(TokenType.COMMA) and self._parse_number() 4280 self._match_r_paren() 4281 elif self._match_texts(("SEED", "REPEATABLE")): 4282 seed = self._parse_wrapped(self._parse_number) 4283 4284 if not method and self.DEFAULT_SAMPLING_METHOD: 4285 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4286 4287 return self.expression( 4288 exp.TableSample, 4289 expressions=expressions, 4290 method=method, 4291 bucket_numerator=bucket_numerator, 4292 bucket_denominator=bucket_denominator, 4293 bucket_field=bucket_field, 4294 percent=percent, 4295 size=size, 4296 seed=seed, 4297 ) 4298 4299 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4300 return list(iter(self._parse_pivot, None)) or None 4301 4302 def _parse_joins(self) -> t.Iterator[exp.Join]: 4303 return iter(self._parse_join, None) 4304 4305 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4306 if not self._match(TokenType.INTO): 4307 return None 4308 4309 return self.expression( 4310 exp.UnpivotColumns, 4311 this=self._match_text_seq("NAME") and self._parse_column(), 4312 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4313 ) 4314 4315 # https://duckdb.org/docs/sql/statements/pivot 4316 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4317 def _parse_on() -> t.Optional[exp.Expression]: 4318 this = self._parse_bitwise() 4319 4320 if self._match(TokenType.IN): 4321 # PIVOT ... ON col IN (row_val1, row_val2) 4322 return self._parse_in(this) 4323 if self._match(TokenType.ALIAS, advance=False): 4324 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4325 return self._parse_alias(this) 4326 4327 return this 4328 4329 this = self._parse_table() 4330 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4331 into = self._parse_unpivot_columns() 4332 using = self._match(TokenType.USING) and self._parse_csv( 4333 lambda: self._parse_alias(self._parse_function()) 4334 ) 4335 group = self._parse_group() 4336 4337 return self.expression( 4338 exp.Pivot, 4339 this=this, 4340 expressions=expressions, 4341 using=using, 4342 group=group, 4343 unpivot=is_unpivot, 4344 into=into, 4345 ) 4346 4347 def _parse_pivot_in(self) -> exp.In: 4348 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4349 this = self._parse_select_or_expression() 4350 4351 self._match(TokenType.ALIAS) 4352 alias = self._parse_bitwise() 4353 if alias: 4354 if isinstance(alias, exp.Column) and not alias.db: 4355 alias = alias.this 4356 return self.expression(exp.PivotAlias, this=this, alias=alias) 4357 4358 return this 4359 4360 value = self._parse_column() 4361 4362 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4363 self.raise_error("Expecting IN (") 4364 4365 if self._match(TokenType.ANY): 4366 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4367 else: 4368 exprs = self._parse_csv(_parse_aliased_expression) 4369 4370 self._match_r_paren() 4371 return self.expression(exp.In, this=value, expressions=exprs) 4372 4373 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4374 index = self._index 4375 include_nulls = None 4376 4377 if self._match(TokenType.PIVOT): 4378 unpivot = False 4379 elif self._match(TokenType.UNPIVOT): 4380 unpivot = True 4381 4382 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4383 if self._match_text_seq("INCLUDE", "NULLS"): 4384 include_nulls = True 4385 elif self._match_text_seq("EXCLUDE", "NULLS"): 4386 include_nulls = False 4387 else: 4388 return None 4389 4390 expressions = [] 4391 4392 if not self._match(TokenType.L_PAREN): 4393 self._retreat(index) 4394 return None 4395 4396 if unpivot: 4397 expressions = self._parse_csv(self._parse_column) 4398 else: 4399 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4400 4401 if not expressions: 4402 self.raise_error("Failed to parse PIVOT's aggregation list") 4403 4404 if not self._match(TokenType.FOR): 4405 self.raise_error("Expecting FOR") 4406 4407 fields = [] 4408 while True: 4409 field = self._try_parse(self._parse_pivot_in) 4410 if not field: 4411 break 4412 fields.append(field) 4413 4414 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4415 self._parse_bitwise 4416 ) 4417 4418 group = self._parse_group() 4419 4420 self._match_r_paren() 4421 4422 pivot = self.expression( 4423 exp.Pivot, 4424 expressions=expressions, 4425 fields=fields, 4426 unpivot=unpivot, 4427 include_nulls=include_nulls, 4428 default_on_null=default_on_null, 4429 group=group, 4430 ) 4431 4432 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4433 pivot.set("alias", self._parse_table_alias()) 4434 4435 if not unpivot: 4436 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4437 4438 columns: t.List[exp.Expression] = [] 4439 all_fields = [] 4440 for pivot_field in pivot.fields: 4441 pivot_field_expressions = pivot_field.expressions 4442 4443 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4444 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4445 continue 4446 4447 all_fields.append( 4448 [ 4449 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4450 for fld in pivot_field_expressions 4451 ] 4452 ) 4453 4454 if all_fields: 4455 if names: 4456 all_fields.append(names) 4457 4458 # Generate all possible combinations of the pivot columns 4459 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4460 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4461 for fld_parts_tuple in itertools.product(*all_fields): 4462 fld_parts = list(fld_parts_tuple) 4463 4464 if names and self.PREFIXED_PIVOT_COLUMNS: 4465 # Move the "name" to the front of the list 4466 fld_parts.insert(0, fld_parts.pop(-1)) 4467 4468 columns.append(exp.to_identifier("_".join(fld_parts))) 4469 4470 pivot.set("columns", columns) 4471 4472 return pivot 4473 4474 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4475 return [agg.alias for agg in aggregations if agg.alias] 4476 4477 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4478 if not skip_where_token and not self._match(TokenType.PREWHERE): 4479 return None 4480 4481 return self.expression( 4482 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4483 ) 4484 4485 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4486 if not skip_where_token and not self._match(TokenType.WHERE): 4487 return None 4488 4489 return self.expression( 4490 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4491 ) 4492 4493 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4494 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4495 return None 4496 4497 elements: t.Dict[str, t.Any] = defaultdict(list) 4498 4499 if self._match(TokenType.ALL): 4500 elements["all"] = True 4501 elif self._match(TokenType.DISTINCT): 4502 elements["all"] = False 4503 4504 while True: 4505 index = self._index 4506 4507 elements["expressions"].extend( 4508 self._parse_csv( 4509 lambda: None 4510 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4511 else self._parse_assignment() 4512 ) 4513 ) 4514 4515 before_with_index = self._index 4516 with_prefix = self._match(TokenType.WITH) 4517 4518 if self._match(TokenType.ROLLUP): 4519 elements["rollup"].append( 4520 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4521 ) 4522 elif self._match(TokenType.CUBE): 4523 elements["cube"].append( 4524 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4525 ) 4526 elif self._match(TokenType.GROUPING_SETS): 4527 elements["grouping_sets"].append( 4528 self.expression( 4529 exp.GroupingSets, 4530 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4531 ) 4532 ) 4533 elif self._match_text_seq("TOTALS"): 4534 elements["totals"] = True # type: ignore 4535 4536 if before_with_index <= self._index <= before_with_index + 1: 4537 self._retreat(before_with_index) 4538 break 4539 4540 if index == self._index: 4541 break 4542 4543 return self.expression(exp.Group, **elements) # type: ignore 4544 4545 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4546 return self.expression( 4547 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4548 ) 4549 4550 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4551 if self._match(TokenType.L_PAREN): 4552 grouping_set = self._parse_csv(self._parse_column) 4553 self._match_r_paren() 4554 return self.expression(exp.Tuple, expressions=grouping_set) 4555 4556 return self._parse_column() 4557 4558 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4559 if not skip_having_token and not self._match(TokenType.HAVING): 4560 return None 4561 return self.expression(exp.Having, this=self._parse_assignment()) 4562 4563 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4564 if not self._match(TokenType.QUALIFY): 4565 return None 4566 return self.expression(exp.Qualify, this=self._parse_assignment()) 4567 4568 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4569 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4570 exp.Prior, this=self._parse_bitwise() 4571 ) 4572 connect = self._parse_assignment() 4573 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4574 return connect 4575 4576 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4577 if skip_start_token: 4578 start = None 4579 elif self._match(TokenType.START_WITH): 4580 start = self._parse_assignment() 4581 else: 4582 return None 4583 4584 self._match(TokenType.CONNECT_BY) 4585 nocycle = self._match_text_seq("NOCYCLE") 4586 connect = self._parse_connect_with_prior() 4587 4588 if not start and self._match(TokenType.START_WITH): 4589 start = self._parse_assignment() 4590 4591 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4592 4593 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4594 this = self._parse_id_var(any_token=True) 4595 if self._match(TokenType.ALIAS): 4596 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4597 return this 4598 4599 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4600 if self._match_text_seq("INTERPOLATE"): 4601 return self._parse_wrapped_csv(self._parse_name_as_expression) 4602 return None 4603 4604 def _parse_order( 4605 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4606 ) -> t.Optional[exp.Expression]: 4607 siblings = None 4608 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4609 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4610 return this 4611 4612 siblings = True 4613 4614 return self.expression( 4615 exp.Order, 4616 this=this, 4617 expressions=self._parse_csv(self._parse_ordered), 4618 siblings=siblings, 4619 ) 4620 4621 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4622 if not self._match(token): 4623 return None 4624 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4625 4626 def _parse_ordered( 4627 self, parse_method: t.Optional[t.Callable] = None 4628 ) -> t.Optional[exp.Ordered]: 4629 this = parse_method() if parse_method else self._parse_assignment() 4630 if not this: 4631 return None 4632 4633 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4634 this = exp.var("ALL") 4635 4636 asc = self._match(TokenType.ASC) 4637 desc = self._match(TokenType.DESC) or (asc and False) 4638 4639 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4640 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4641 4642 nulls_first = is_nulls_first or False 4643 explicitly_null_ordered = is_nulls_first or is_nulls_last 4644 4645 if ( 4646 not explicitly_null_ordered 4647 and ( 4648 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4649 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4650 ) 4651 and self.dialect.NULL_ORDERING != "nulls_are_last" 4652 ): 4653 nulls_first = True 4654 4655 if self._match_text_seq("WITH", "FILL"): 4656 with_fill = self.expression( 4657 exp.WithFill, 4658 **{ # type: ignore 4659 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4660 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4661 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4662 "interpolate": self._parse_interpolate(), 4663 }, 4664 ) 4665 else: 4666 with_fill = None 4667 4668 return self.expression( 4669 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4670 ) 4671 4672 def _parse_limit_options(self) -> exp.LimitOptions: 4673 percent = self._match(TokenType.PERCENT) 4674 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4675 self._match_text_seq("ONLY") 4676 with_ties = self._match_text_seq("WITH", "TIES") 4677 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4678 4679 def _parse_limit( 4680 self, 4681 this: t.Optional[exp.Expression] = None, 4682 top: bool = False, 4683 skip_limit_token: bool = False, 4684 ) -> t.Optional[exp.Expression]: 4685 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4686 comments = self._prev_comments 4687 if top: 4688 limit_paren = self._match(TokenType.L_PAREN) 4689 expression = self._parse_term() if limit_paren else self._parse_number() 4690 4691 if limit_paren: 4692 self._match_r_paren() 4693 4694 limit_options = self._parse_limit_options() 4695 else: 4696 limit_options = None 4697 expression = self._parse_term() 4698 4699 if self._match(TokenType.COMMA): 4700 offset = expression 4701 expression = self._parse_term() 4702 else: 4703 offset = None 4704 4705 limit_exp = self.expression( 4706 exp.Limit, 4707 this=this, 4708 expression=expression, 4709 offset=offset, 4710 comments=comments, 4711 limit_options=limit_options, 4712 expressions=self._parse_limit_by(), 4713 ) 4714 4715 return limit_exp 4716 4717 if self._match(TokenType.FETCH): 4718 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4719 direction = self._prev.text.upper() if direction else "FIRST" 4720 4721 count = self._parse_field(tokens=self.FETCH_TOKENS) 4722 4723 return self.expression( 4724 exp.Fetch, 4725 direction=direction, 4726 count=count, 4727 limit_options=self._parse_limit_options(), 4728 ) 4729 4730 return this 4731 4732 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4733 if not self._match(TokenType.OFFSET): 4734 return this 4735 4736 count = self._parse_term() 4737 self._match_set((TokenType.ROW, TokenType.ROWS)) 4738 4739 return self.expression( 4740 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4741 ) 4742 4743 def _can_parse_limit_or_offset(self) -> bool: 4744 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4745 return False 4746 4747 index = self._index 4748 result = bool( 4749 self._try_parse(self._parse_limit, retreat=True) 4750 or self._try_parse(self._parse_offset, retreat=True) 4751 ) 4752 self._retreat(index) 4753 return result 4754 4755 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4756 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4757 4758 def _parse_locks(self) -> t.List[exp.Lock]: 4759 locks = [] 4760 while True: 4761 if self._match_text_seq("FOR", "UPDATE"): 4762 update = True 4763 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4764 "LOCK", "IN", "SHARE", "MODE" 4765 ): 4766 update = False 4767 else: 4768 break 4769 4770 expressions = None 4771 if self._match_text_seq("OF"): 4772 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4773 4774 wait: t.Optional[bool | exp.Expression] = None 4775 if self._match_text_seq("NOWAIT"): 4776 wait = True 4777 elif self._match_text_seq("WAIT"): 4778 wait = self._parse_primary() 4779 elif self._match_text_seq("SKIP", "LOCKED"): 4780 wait = False 4781 4782 locks.append( 4783 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4784 ) 4785 4786 return locks 4787 4788 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4789 start = self._index 4790 _, side_token, kind_token = self._parse_join_parts() 4791 4792 side = side_token.text if side_token else None 4793 kind = kind_token.text if kind_token else None 4794 4795 if not self._match_set(self.SET_OPERATIONS): 4796 self._retreat(start) 4797 return None 4798 4799 token_type = self._prev.token_type 4800 4801 if token_type == TokenType.UNION: 4802 operation: t.Type[exp.SetOperation] = exp.Union 4803 elif token_type == TokenType.EXCEPT: 4804 operation = exp.Except 4805 else: 4806 operation = exp.Intersect 4807 4808 comments = self._prev.comments 4809 4810 if self._match(TokenType.DISTINCT): 4811 distinct: t.Optional[bool] = True 4812 elif self._match(TokenType.ALL): 4813 distinct = False 4814 else: 4815 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4816 if distinct is None: 4817 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4818 4819 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4820 "STRICT", "CORRESPONDING" 4821 ) 4822 if self._match_text_seq("CORRESPONDING"): 4823 by_name = True 4824 if not side and not kind: 4825 kind = "INNER" 4826 4827 on_column_list = None 4828 if by_name and self._match_texts(("ON", "BY")): 4829 on_column_list = self._parse_wrapped_csv(self._parse_column) 4830 4831 expression = self._parse_select(nested=True, parse_set_operation=False) 4832 4833 return self.expression( 4834 operation, 4835 comments=comments, 4836 this=this, 4837 distinct=distinct, 4838 by_name=by_name, 4839 expression=expression, 4840 side=side, 4841 kind=kind, 4842 on=on_column_list, 4843 ) 4844 4845 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4846 while this: 4847 setop = self.parse_set_operation(this) 4848 if not setop: 4849 break 4850 this = setop 4851 4852 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4853 expression = this.expression 4854 4855 if expression: 4856 for arg in self.SET_OP_MODIFIERS: 4857 expr = expression.args.get(arg) 4858 if expr: 4859 this.set(arg, expr.pop()) 4860 4861 return this 4862 4863 def _parse_expression(self) -> t.Optional[exp.Expression]: 4864 return self._parse_alias(self._parse_assignment()) 4865 4866 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4867 this = self._parse_disjunction() 4868 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4869 # This allows us to parse <non-identifier token> := <expr> 4870 this = exp.column( 4871 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4872 ) 4873 4874 while self._match_set(self.ASSIGNMENT): 4875 if isinstance(this, exp.Column) and len(this.parts) == 1: 4876 this = this.this 4877 4878 this = self.expression( 4879 self.ASSIGNMENT[self._prev.token_type], 4880 this=this, 4881 comments=self._prev_comments, 4882 expression=self._parse_assignment(), 4883 ) 4884 4885 return this 4886 4887 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4888 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4889 4890 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4891 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4892 4893 def _parse_equality(self) -> t.Optional[exp.Expression]: 4894 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4895 4896 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4897 return self._parse_tokens(self._parse_range, self.COMPARISON) 4898 4899 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4900 this = this or self._parse_bitwise() 4901 negate = self._match(TokenType.NOT) 4902 4903 if self._match_set(self.RANGE_PARSERS): 4904 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4905 if not expression: 4906 return this 4907 4908 this = expression 4909 elif self._match(TokenType.ISNULL): 4910 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4911 4912 # Postgres supports ISNULL and NOTNULL for conditions. 4913 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4914 if self._match(TokenType.NOTNULL): 4915 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4916 this = self.expression(exp.Not, this=this) 4917 4918 if negate: 4919 this = self._negate_range(this) 4920 4921 if self._match(TokenType.IS): 4922 this = self._parse_is(this) 4923 4924 return this 4925 4926 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4927 if not this: 4928 return this 4929 4930 return self.expression(exp.Not, this=this) 4931 4932 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4933 index = self._index - 1 4934 negate = self._match(TokenType.NOT) 4935 4936 if self._match_text_seq("DISTINCT", "FROM"): 4937 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4938 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4939 4940 if self._match(TokenType.JSON): 4941 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4942 4943 if self._match_text_seq("WITH"): 4944 _with = True 4945 elif self._match_text_seq("WITHOUT"): 4946 _with = False 4947 else: 4948 _with = None 4949 4950 unique = self._match(TokenType.UNIQUE) 4951 self._match_text_seq("KEYS") 4952 expression: t.Optional[exp.Expression] = self.expression( 4953 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4954 ) 4955 else: 4956 expression = self._parse_primary() or self._parse_null() 4957 if not expression: 4958 self._retreat(index) 4959 return None 4960 4961 this = self.expression(exp.Is, this=this, expression=expression) 4962 return self.expression(exp.Not, this=this) if negate else this 4963 4964 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4965 unnest = self._parse_unnest(with_alias=False) 4966 if unnest: 4967 this = self.expression(exp.In, this=this, unnest=unnest) 4968 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4969 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4970 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4971 4972 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4973 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4974 else: 4975 this = self.expression(exp.In, this=this, expressions=expressions) 4976 4977 if matched_l_paren: 4978 self._match_r_paren(this) 4979 elif not self._match(TokenType.R_BRACKET, expression=this): 4980 self.raise_error("Expecting ]") 4981 else: 4982 this = self.expression(exp.In, this=this, field=self._parse_column()) 4983 4984 return this 4985 4986 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4987 low = self._parse_bitwise() 4988 self._match(TokenType.AND) 4989 high = self._parse_bitwise() 4990 return self.expression(exp.Between, this=this, low=low, high=high) 4991 4992 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4993 if not self._match(TokenType.ESCAPE): 4994 return this 4995 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4996 4997 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4998 index = self._index 4999 5000 if not self._match(TokenType.INTERVAL) and match_interval: 5001 return None 5002 5003 if self._match(TokenType.STRING, advance=False): 5004 this = self._parse_primary() 5005 else: 5006 this = self._parse_term() 5007 5008 if not this or ( 5009 isinstance(this, exp.Column) 5010 and not this.table 5011 and not this.this.quoted 5012 and this.name.upper() == "IS" 5013 ): 5014 self._retreat(index) 5015 return None 5016 5017 unit = self._parse_function() or ( 5018 not self._match(TokenType.ALIAS, advance=False) 5019 and self._parse_var(any_token=True, upper=True) 5020 ) 5021 5022 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5023 # each INTERVAL expression into this canonical form so it's easy to transpile 5024 if this and this.is_number: 5025 this = exp.Literal.string(this.to_py()) 5026 elif this and this.is_string: 5027 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5028 if parts and unit: 5029 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5030 unit = None 5031 self._retreat(self._index - 1) 5032 5033 if len(parts) == 1: 5034 this = exp.Literal.string(parts[0][0]) 5035 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5036 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5037 unit = self.expression( 5038 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5039 ) 5040 5041 interval = self.expression(exp.Interval, this=this, unit=unit) 5042 5043 index = self._index 5044 self._match(TokenType.PLUS) 5045 5046 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5047 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5048 return self.expression( 5049 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5050 ) 5051 5052 self._retreat(index) 5053 return interval 5054 5055 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5056 this = self._parse_term() 5057 5058 while True: 5059 if self._match_set(self.BITWISE): 5060 this = self.expression( 5061 self.BITWISE[self._prev.token_type], 5062 this=this, 5063 expression=self._parse_term(), 5064 ) 5065 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5066 this = self.expression( 5067 exp.DPipe, 5068 this=this, 5069 expression=self._parse_term(), 5070 safe=not self.dialect.STRICT_STRING_CONCAT, 5071 ) 5072 elif self._match(TokenType.DQMARK): 5073 this = self.expression( 5074 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5075 ) 5076 elif self._match_pair(TokenType.LT, TokenType.LT): 5077 this = self.expression( 5078 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5079 ) 5080 elif self._match_pair(TokenType.GT, TokenType.GT): 5081 this = self.expression( 5082 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5083 ) 5084 else: 5085 break 5086 5087 return this 5088 5089 def _parse_term(self) -> t.Optional[exp.Expression]: 5090 this = self._parse_factor() 5091 5092 while self._match_set(self.TERM): 5093 klass = self.TERM[self._prev.token_type] 5094 comments = self._prev_comments 5095 expression = self._parse_factor() 5096 5097 this = self.expression(klass, this=this, comments=comments, expression=expression) 5098 5099 if isinstance(this, exp.Collate): 5100 expr = this.expression 5101 5102 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5103 # fallback to Identifier / Var 5104 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5105 ident = expr.this 5106 if isinstance(ident, exp.Identifier): 5107 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5108 5109 return this 5110 5111 def _parse_factor(self) -> t.Optional[exp.Expression]: 5112 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5113 this = parse_method() 5114 5115 while self._match_set(self.FACTOR): 5116 klass = self.FACTOR[self._prev.token_type] 5117 comments = self._prev_comments 5118 expression = parse_method() 5119 5120 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5121 self._retreat(self._index - 1) 5122 return this 5123 5124 this = self.expression(klass, this=this, comments=comments, expression=expression) 5125 5126 if isinstance(this, exp.Div): 5127 this.args["typed"] = self.dialect.TYPED_DIVISION 5128 this.args["safe"] = self.dialect.SAFE_DIVISION 5129 5130 return this 5131 5132 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5133 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5134 5135 def _parse_unary(self) -> t.Optional[exp.Expression]: 5136 if self._match_set(self.UNARY_PARSERS): 5137 return self.UNARY_PARSERS[self._prev.token_type](self) 5138 return self._parse_at_time_zone(self._parse_type()) 5139 5140 def _parse_type( 5141 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5142 ) -> t.Optional[exp.Expression]: 5143 interval = parse_interval and self._parse_interval() 5144 if interval: 5145 return interval 5146 5147 index = self._index 5148 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5149 5150 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5151 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5152 if isinstance(data_type, exp.Cast): 5153 # This constructor can contain ops directly after it, for instance struct unnesting: 5154 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5155 return self._parse_column_ops(data_type) 5156 5157 if data_type: 5158 index2 = self._index 5159 this = self._parse_primary() 5160 5161 if isinstance(this, exp.Literal): 5162 this = self._parse_column_ops(this) 5163 5164 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5165 if parser: 5166 return parser(self, this, data_type) 5167 5168 return self.expression(exp.Cast, this=this, to=data_type) 5169 5170 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5171 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5172 # 5173 # If the index difference here is greater than 1, that means the parser itself must have 5174 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5175 # 5176 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5177 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5178 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5179 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5180 # 5181 # In these cases, we don't really want to return the converted type, but instead retreat 5182 # and try to parse a Column or Identifier in the section below. 5183 if data_type.expressions and index2 - index > 1: 5184 self._retreat(index2) 5185 return self._parse_column_ops(data_type) 5186 5187 self._retreat(index) 5188 5189 if fallback_to_identifier: 5190 return self._parse_id_var() 5191 5192 this = self._parse_column() 5193 return this and self._parse_column_ops(this) 5194 5195 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5196 this = self._parse_type() 5197 if not this: 5198 return None 5199 5200 if isinstance(this, exp.Column) and not this.table: 5201 this = exp.var(this.name.upper()) 5202 5203 return self.expression( 5204 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5205 ) 5206 5207 def _parse_types( 5208 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5209 ) -> t.Optional[exp.Expression]: 5210 index = self._index 5211 5212 this: t.Optional[exp.Expression] = None 5213 prefix = self._match_text_seq("SYSUDTLIB", ".") 5214 5215 if not self._match_set(self.TYPE_TOKENS): 5216 identifier = allow_identifiers and self._parse_id_var( 5217 any_token=False, tokens=(TokenType.VAR,) 5218 ) 5219 if isinstance(identifier, exp.Identifier): 5220 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5221 5222 if len(tokens) != 1: 5223 self.raise_error("Unexpected identifier", self._prev) 5224 5225 if tokens[0].token_type in self.TYPE_TOKENS: 5226 self._prev = tokens[0] 5227 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5228 type_name = identifier.name 5229 5230 while self._match(TokenType.DOT): 5231 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5232 5233 this = exp.DataType.build(type_name, udt=True) 5234 else: 5235 self._retreat(self._index - 1) 5236 return None 5237 else: 5238 return None 5239 5240 type_token = self._prev.token_type 5241 5242 if type_token == TokenType.PSEUDO_TYPE: 5243 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5244 5245 if type_token == TokenType.OBJECT_IDENTIFIER: 5246 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5247 5248 # https://materialize.com/docs/sql/types/map/ 5249 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5250 key_type = self._parse_types( 5251 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5252 ) 5253 if not self._match(TokenType.FARROW): 5254 self._retreat(index) 5255 return None 5256 5257 value_type = self._parse_types( 5258 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5259 ) 5260 if not self._match(TokenType.R_BRACKET): 5261 self._retreat(index) 5262 return None 5263 5264 return exp.DataType( 5265 this=exp.DataType.Type.MAP, 5266 expressions=[key_type, value_type], 5267 nested=True, 5268 prefix=prefix, 5269 ) 5270 5271 nested = type_token in self.NESTED_TYPE_TOKENS 5272 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5273 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5274 expressions = None 5275 maybe_func = False 5276 5277 if self._match(TokenType.L_PAREN): 5278 if is_struct: 5279 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5280 elif nested: 5281 expressions = self._parse_csv( 5282 lambda: self._parse_types( 5283 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5284 ) 5285 ) 5286 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5287 this = expressions[0] 5288 this.set("nullable", True) 5289 self._match_r_paren() 5290 return this 5291 elif type_token in self.ENUM_TYPE_TOKENS: 5292 expressions = self._parse_csv(self._parse_equality) 5293 elif is_aggregate: 5294 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5295 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5296 ) 5297 if not func_or_ident: 5298 return None 5299 expressions = [func_or_ident] 5300 if self._match(TokenType.COMMA): 5301 expressions.extend( 5302 self._parse_csv( 5303 lambda: self._parse_types( 5304 check_func=check_func, 5305 schema=schema, 5306 allow_identifiers=allow_identifiers, 5307 ) 5308 ) 5309 ) 5310 else: 5311 expressions = self._parse_csv(self._parse_type_size) 5312 5313 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5314 if type_token == TokenType.VECTOR and len(expressions) == 2: 5315 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5316 5317 if not expressions or not self._match(TokenType.R_PAREN): 5318 self._retreat(index) 5319 return None 5320 5321 maybe_func = True 5322 5323 values: t.Optional[t.List[exp.Expression]] = None 5324 5325 if nested and self._match(TokenType.LT): 5326 if is_struct: 5327 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5328 else: 5329 expressions = self._parse_csv( 5330 lambda: self._parse_types( 5331 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5332 ) 5333 ) 5334 5335 if not self._match(TokenType.GT): 5336 self.raise_error("Expecting >") 5337 5338 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5339 values = self._parse_csv(self._parse_assignment) 5340 if not values and is_struct: 5341 values = None 5342 self._retreat(self._index - 1) 5343 else: 5344 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5345 5346 if type_token in self.TIMESTAMPS: 5347 if self._match_text_seq("WITH", "TIME", "ZONE"): 5348 maybe_func = False 5349 tz_type = ( 5350 exp.DataType.Type.TIMETZ 5351 if type_token in self.TIMES 5352 else exp.DataType.Type.TIMESTAMPTZ 5353 ) 5354 this = exp.DataType(this=tz_type, expressions=expressions) 5355 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5356 maybe_func = False 5357 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5358 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5359 maybe_func = False 5360 elif type_token == TokenType.INTERVAL: 5361 unit = self._parse_var(upper=True) 5362 if unit: 5363 if self._match_text_seq("TO"): 5364 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5365 5366 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5367 else: 5368 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5369 elif type_token == TokenType.VOID: 5370 this = exp.DataType(this=exp.DataType.Type.NULL) 5371 5372 if maybe_func and check_func: 5373 index2 = self._index 5374 peek = self._parse_string() 5375 5376 if not peek: 5377 self._retreat(index) 5378 return None 5379 5380 self._retreat(index2) 5381 5382 if not this: 5383 if self._match_text_seq("UNSIGNED"): 5384 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5385 if not unsigned_type_token: 5386 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5387 5388 type_token = unsigned_type_token or type_token 5389 5390 this = exp.DataType( 5391 this=exp.DataType.Type[type_token.value], 5392 expressions=expressions, 5393 nested=nested, 5394 prefix=prefix, 5395 ) 5396 5397 # Empty arrays/structs are allowed 5398 if values is not None: 5399 cls = exp.Struct if is_struct else exp.Array 5400 this = exp.cast(cls(expressions=values), this, copy=False) 5401 5402 elif expressions: 5403 this.set("expressions", expressions) 5404 5405 # https://materialize.com/docs/sql/types/list/#type-name 5406 while self._match(TokenType.LIST): 5407 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5408 5409 index = self._index 5410 5411 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5412 matched_array = self._match(TokenType.ARRAY) 5413 5414 while self._curr: 5415 datatype_token = self._prev.token_type 5416 matched_l_bracket = self._match(TokenType.L_BRACKET) 5417 5418 if (not matched_l_bracket and not matched_array) or ( 5419 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5420 ): 5421 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5422 # not to be confused with the fixed size array parsing 5423 break 5424 5425 matched_array = False 5426 values = self._parse_csv(self._parse_assignment) or None 5427 if ( 5428 values 5429 and not schema 5430 and ( 5431 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5432 ) 5433 ): 5434 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5435 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5436 self._retreat(index) 5437 break 5438 5439 this = exp.DataType( 5440 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5441 ) 5442 self._match(TokenType.R_BRACKET) 5443 5444 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5445 converter = self.TYPE_CONVERTERS.get(this.this) 5446 if converter: 5447 this = converter(t.cast(exp.DataType, this)) 5448 5449 return this 5450 5451 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5452 index = self._index 5453 5454 if ( 5455 self._curr 5456 and self._next 5457 and self._curr.token_type in self.TYPE_TOKENS 5458 and self._next.token_type in self.TYPE_TOKENS 5459 ): 5460 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5461 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5462 this = self._parse_id_var() 5463 else: 5464 this = ( 5465 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5466 or self._parse_id_var() 5467 ) 5468 5469 self._match(TokenType.COLON) 5470 5471 if ( 5472 type_required 5473 and not isinstance(this, exp.DataType) 5474 and not self._match_set(self.TYPE_TOKENS, advance=False) 5475 ): 5476 self._retreat(index) 5477 return self._parse_types() 5478 5479 return self._parse_column_def(this) 5480 5481 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5482 if not self._match_text_seq("AT", "TIME", "ZONE"): 5483 return this 5484 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5485 5486 def _parse_column(self) -> t.Optional[exp.Expression]: 5487 this = self._parse_column_reference() 5488 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5489 5490 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5491 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5492 5493 return column 5494 5495 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5496 this = self._parse_field() 5497 if ( 5498 not this 5499 and self._match(TokenType.VALUES, advance=False) 5500 and self.VALUES_FOLLOWED_BY_PAREN 5501 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5502 ): 5503 this = self._parse_id_var() 5504 5505 if isinstance(this, exp.Identifier): 5506 # We bubble up comments from the Identifier to the Column 5507 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5508 5509 return this 5510 5511 def _parse_colon_as_variant_extract( 5512 self, this: t.Optional[exp.Expression] 5513 ) -> t.Optional[exp.Expression]: 5514 casts = [] 5515 json_path = [] 5516 escape = None 5517 5518 while self._match(TokenType.COLON): 5519 start_index = self._index 5520 5521 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5522 path = self._parse_column_ops( 5523 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5524 ) 5525 5526 # The cast :: operator has a lower precedence than the extraction operator :, so 5527 # we rearrange the AST appropriately to avoid casting the JSON path 5528 while isinstance(path, exp.Cast): 5529 casts.append(path.to) 5530 path = path.this 5531 5532 if casts: 5533 dcolon_offset = next( 5534 i 5535 for i, t in enumerate(self._tokens[start_index:]) 5536 if t.token_type == TokenType.DCOLON 5537 ) 5538 end_token = self._tokens[start_index + dcolon_offset - 1] 5539 else: 5540 end_token = self._prev 5541 5542 if path: 5543 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5544 # it'll roundtrip to a string literal in GET_PATH 5545 if isinstance(path, exp.Identifier) and path.quoted: 5546 escape = True 5547 5548 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5549 5550 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5551 # Databricks transforms it back to the colon/dot notation 5552 if json_path: 5553 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5554 5555 if json_path_expr: 5556 json_path_expr.set("escape", escape) 5557 5558 this = self.expression( 5559 exp.JSONExtract, 5560 this=this, 5561 expression=json_path_expr, 5562 variant_extract=True, 5563 ) 5564 5565 while casts: 5566 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5567 5568 return this 5569 5570 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5571 return self._parse_types() 5572 5573 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5574 this = self._parse_bracket(this) 5575 5576 while self._match_set(self.COLUMN_OPERATORS): 5577 op_token = self._prev.token_type 5578 op = self.COLUMN_OPERATORS.get(op_token) 5579 5580 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5581 field = self._parse_dcolon() 5582 if not field: 5583 self.raise_error("Expected type") 5584 elif op and self._curr: 5585 field = self._parse_column_reference() or self._parse_bracket() 5586 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5587 field = self._parse_column_ops(field) 5588 else: 5589 field = self._parse_field(any_token=True, anonymous_func=True) 5590 5591 if isinstance(field, (exp.Func, exp.Window)) and this: 5592 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5593 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5594 this = exp.replace_tree( 5595 this, 5596 lambda n: ( 5597 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5598 if n.table 5599 else n.this 5600 ) 5601 if isinstance(n, exp.Column) 5602 else n, 5603 ) 5604 5605 if op: 5606 this = op(self, this, field) 5607 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5608 this = self.expression( 5609 exp.Column, 5610 comments=this.comments, 5611 this=field, 5612 table=this.this, 5613 db=this.args.get("table"), 5614 catalog=this.args.get("db"), 5615 ) 5616 elif isinstance(field, exp.Window): 5617 # Move the exp.Dot's to the window's function 5618 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5619 field.set("this", window_func) 5620 this = field 5621 else: 5622 this = self.expression(exp.Dot, this=this, expression=field) 5623 5624 if field and field.comments: 5625 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5626 5627 this = self._parse_bracket(this) 5628 5629 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5630 5631 def _parse_primary(self) -> t.Optional[exp.Expression]: 5632 if self._match_set(self.PRIMARY_PARSERS): 5633 token_type = self._prev.token_type 5634 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5635 5636 if token_type == TokenType.STRING: 5637 expressions = [primary] 5638 while self._match(TokenType.STRING): 5639 expressions.append(exp.Literal.string(self._prev.text)) 5640 5641 if len(expressions) > 1: 5642 return self.expression(exp.Concat, expressions=expressions) 5643 5644 return primary 5645 5646 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5647 return exp.Literal.number(f"0.{self._prev.text}") 5648 5649 if self._match(TokenType.L_PAREN): 5650 comments = self._prev_comments 5651 query = self._parse_select() 5652 5653 if query: 5654 expressions = [query] 5655 else: 5656 expressions = self._parse_expressions() 5657 5658 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5659 5660 if not this and self._match(TokenType.R_PAREN, advance=False): 5661 this = self.expression(exp.Tuple) 5662 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5663 this = self._parse_subquery(this=this, parse_alias=False) 5664 elif isinstance(this, exp.Subquery): 5665 this = self._parse_subquery( 5666 this=self._parse_set_operations(this), parse_alias=False 5667 ) 5668 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5669 this = self.expression(exp.Tuple, expressions=expressions) 5670 else: 5671 this = self.expression(exp.Paren, this=this) 5672 5673 if this: 5674 this.add_comments(comments) 5675 5676 self._match_r_paren(expression=this) 5677 return this 5678 5679 return None 5680 5681 def _parse_field( 5682 self, 5683 any_token: bool = False, 5684 tokens: t.Optional[t.Collection[TokenType]] = None, 5685 anonymous_func: bool = False, 5686 ) -> t.Optional[exp.Expression]: 5687 if anonymous_func: 5688 field = ( 5689 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5690 or self._parse_primary() 5691 ) 5692 else: 5693 field = self._parse_primary() or self._parse_function( 5694 anonymous=anonymous_func, any_token=any_token 5695 ) 5696 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5697 5698 def _parse_function( 5699 self, 5700 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5701 anonymous: bool = False, 5702 optional_parens: bool = True, 5703 any_token: bool = False, 5704 ) -> t.Optional[exp.Expression]: 5705 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5706 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5707 fn_syntax = False 5708 if ( 5709 self._match(TokenType.L_BRACE, advance=False) 5710 and self._next 5711 and self._next.text.upper() == "FN" 5712 ): 5713 self._advance(2) 5714 fn_syntax = True 5715 5716 func = self._parse_function_call( 5717 functions=functions, 5718 anonymous=anonymous, 5719 optional_parens=optional_parens, 5720 any_token=any_token, 5721 ) 5722 5723 if fn_syntax: 5724 self._match(TokenType.R_BRACE) 5725 5726 return func 5727 5728 def _parse_function_call( 5729 self, 5730 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5731 anonymous: bool = False, 5732 optional_parens: bool = True, 5733 any_token: bool = False, 5734 ) -> t.Optional[exp.Expression]: 5735 if not self._curr: 5736 return None 5737 5738 comments = self._curr.comments 5739 token = self._curr 5740 token_type = self._curr.token_type 5741 this = self._curr.text 5742 upper = this.upper() 5743 5744 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5745 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5746 self._advance() 5747 return self._parse_window(parser(self)) 5748 5749 if not self._next or self._next.token_type != TokenType.L_PAREN: 5750 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5751 self._advance() 5752 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5753 5754 return None 5755 5756 if any_token: 5757 if token_type in self.RESERVED_TOKENS: 5758 return None 5759 elif token_type not in self.FUNC_TOKENS: 5760 return None 5761 5762 self._advance(2) 5763 5764 parser = self.FUNCTION_PARSERS.get(upper) 5765 if parser and not anonymous: 5766 this = parser(self) 5767 else: 5768 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5769 5770 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5771 this = self.expression( 5772 subquery_predicate, comments=comments, this=self._parse_select() 5773 ) 5774 self._match_r_paren() 5775 return this 5776 5777 if functions is None: 5778 functions = self.FUNCTIONS 5779 5780 function = functions.get(upper) 5781 known_function = function and not anonymous 5782 5783 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5784 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5785 5786 post_func_comments = self._curr and self._curr.comments 5787 if known_function and post_func_comments: 5788 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5789 # call we'll construct it as exp.Anonymous, even if it's "known" 5790 if any( 5791 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5792 for comment in post_func_comments 5793 ): 5794 known_function = False 5795 5796 if alias and known_function: 5797 args = self._kv_to_prop_eq(args) 5798 5799 if known_function: 5800 func_builder = t.cast(t.Callable, function) 5801 5802 if "dialect" in func_builder.__code__.co_varnames: 5803 func = func_builder(args, dialect=self.dialect) 5804 else: 5805 func = func_builder(args) 5806 5807 func = self.validate_expression(func, args) 5808 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5809 func.meta["name"] = this 5810 5811 this = func 5812 else: 5813 if token_type == TokenType.IDENTIFIER: 5814 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5815 5816 this = self.expression(exp.Anonymous, this=this, expressions=args) 5817 this = this.update_positions(token) 5818 5819 if isinstance(this, exp.Expression): 5820 this.add_comments(comments) 5821 5822 self._match_r_paren(this) 5823 return self._parse_window(this) 5824 5825 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5826 return expression 5827 5828 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5829 transformed = [] 5830 5831 for index, e in enumerate(expressions): 5832 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5833 if isinstance(e, exp.Alias): 5834 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5835 5836 if not isinstance(e, exp.PropertyEQ): 5837 e = self.expression( 5838 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5839 ) 5840 5841 if isinstance(e.this, exp.Column): 5842 e.this.replace(e.this.this) 5843 else: 5844 e = self._to_prop_eq(e, index) 5845 5846 transformed.append(e) 5847 5848 return transformed 5849 5850 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5851 return self._parse_statement() 5852 5853 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5854 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5855 5856 def _parse_user_defined_function( 5857 self, kind: t.Optional[TokenType] = None 5858 ) -> t.Optional[exp.Expression]: 5859 this = self._parse_table_parts(schema=True) 5860 5861 if not self._match(TokenType.L_PAREN): 5862 return this 5863 5864 expressions = self._parse_csv(self._parse_function_parameter) 5865 self._match_r_paren() 5866 return self.expression( 5867 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5868 ) 5869 5870 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5871 literal = self._parse_primary() 5872 if literal: 5873 return self.expression(exp.Introducer, this=token.text, expression=literal) 5874 5875 return self._identifier_expression(token) 5876 5877 def _parse_session_parameter(self) -> exp.SessionParameter: 5878 kind = None 5879 this = self._parse_id_var() or self._parse_primary() 5880 5881 if this and self._match(TokenType.DOT): 5882 kind = this.name 5883 this = self._parse_var() or self._parse_primary() 5884 5885 return self.expression(exp.SessionParameter, this=this, kind=kind) 5886 5887 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5888 return self._parse_id_var() 5889 5890 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5891 index = self._index 5892 5893 if self._match(TokenType.L_PAREN): 5894 expressions = t.cast( 5895 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5896 ) 5897 5898 if not self._match(TokenType.R_PAREN): 5899 self._retreat(index) 5900 else: 5901 expressions = [self._parse_lambda_arg()] 5902 5903 if self._match_set(self.LAMBDAS): 5904 return self.LAMBDAS[self._prev.token_type](self, expressions) 5905 5906 self._retreat(index) 5907 5908 this: t.Optional[exp.Expression] 5909 5910 if self._match(TokenType.DISTINCT): 5911 this = self.expression( 5912 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5913 ) 5914 else: 5915 this = self._parse_select_or_expression(alias=alias) 5916 5917 return self._parse_limit( 5918 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5919 ) 5920 5921 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5922 index = self._index 5923 if not self._match(TokenType.L_PAREN): 5924 return this 5925 5926 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5927 # expr can be of both types 5928 if self._match_set(self.SELECT_START_TOKENS): 5929 self._retreat(index) 5930 return this 5931 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5932 self._match_r_paren() 5933 return self.expression(exp.Schema, this=this, expressions=args) 5934 5935 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5936 return self._parse_column_def(self._parse_field(any_token=True)) 5937 5938 def _parse_column_def( 5939 self, this: t.Optional[exp.Expression], computed_column: bool = True 5940 ) -> t.Optional[exp.Expression]: 5941 # column defs are not really columns, they're identifiers 5942 if isinstance(this, exp.Column): 5943 this = this.this 5944 5945 if not computed_column: 5946 self._match(TokenType.ALIAS) 5947 5948 kind = self._parse_types(schema=True) 5949 5950 if self._match_text_seq("FOR", "ORDINALITY"): 5951 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5952 5953 constraints: t.List[exp.Expression] = [] 5954 5955 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5956 ("ALIAS", "MATERIALIZED") 5957 ): 5958 persisted = self._prev.text.upper() == "MATERIALIZED" 5959 constraint_kind = exp.ComputedColumnConstraint( 5960 this=self._parse_assignment(), 5961 persisted=persisted or self._match_text_seq("PERSISTED"), 5962 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5963 ) 5964 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5965 elif ( 5966 kind 5967 and self._match(TokenType.ALIAS, advance=False) 5968 and ( 5969 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5970 or (self._next and self._next.token_type == TokenType.L_PAREN) 5971 ) 5972 ): 5973 self._advance() 5974 constraints.append( 5975 self.expression( 5976 exp.ColumnConstraint, 5977 kind=exp.ComputedColumnConstraint(this=self._parse_disjunction()), 5978 ) 5979 ) 5980 5981 while True: 5982 constraint = self._parse_column_constraint() 5983 if not constraint: 5984 break 5985 constraints.append(constraint) 5986 5987 if not kind and not constraints: 5988 return this 5989 5990 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5991 5992 def _parse_auto_increment( 5993 self, 5994 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5995 start = None 5996 increment = None 5997 5998 if self._match(TokenType.L_PAREN, advance=False): 5999 args = self._parse_wrapped_csv(self._parse_bitwise) 6000 start = seq_get(args, 0) 6001 increment = seq_get(args, 1) 6002 elif self._match_text_seq("START"): 6003 start = self._parse_bitwise() 6004 self._match_text_seq("INCREMENT") 6005 increment = self._parse_bitwise() 6006 6007 if start and increment: 6008 return exp.GeneratedAsIdentityColumnConstraint( 6009 start=start, increment=increment, this=False 6010 ) 6011 6012 return exp.AutoIncrementColumnConstraint() 6013 6014 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6015 if not self._match_text_seq("REFRESH"): 6016 self._retreat(self._index - 1) 6017 return None 6018 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6019 6020 def _parse_compress(self) -> exp.CompressColumnConstraint: 6021 if self._match(TokenType.L_PAREN, advance=False): 6022 return self.expression( 6023 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6024 ) 6025 6026 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6027 6028 def _parse_generated_as_identity( 6029 self, 6030 ) -> ( 6031 exp.GeneratedAsIdentityColumnConstraint 6032 | exp.ComputedColumnConstraint 6033 | exp.GeneratedAsRowColumnConstraint 6034 ): 6035 if self._match_text_seq("BY", "DEFAULT"): 6036 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6037 this = self.expression( 6038 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6039 ) 6040 else: 6041 self._match_text_seq("ALWAYS") 6042 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6043 6044 self._match(TokenType.ALIAS) 6045 6046 if self._match_text_seq("ROW"): 6047 start = self._match_text_seq("START") 6048 if not start: 6049 self._match(TokenType.END) 6050 hidden = self._match_text_seq("HIDDEN") 6051 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6052 6053 identity = self._match_text_seq("IDENTITY") 6054 6055 if self._match(TokenType.L_PAREN): 6056 if self._match(TokenType.START_WITH): 6057 this.set("start", self._parse_bitwise()) 6058 if self._match_text_seq("INCREMENT", "BY"): 6059 this.set("increment", self._parse_bitwise()) 6060 if self._match_text_seq("MINVALUE"): 6061 this.set("minvalue", self._parse_bitwise()) 6062 if self._match_text_seq("MAXVALUE"): 6063 this.set("maxvalue", self._parse_bitwise()) 6064 6065 if self._match_text_seq("CYCLE"): 6066 this.set("cycle", True) 6067 elif self._match_text_seq("NO", "CYCLE"): 6068 this.set("cycle", False) 6069 6070 if not identity: 6071 this.set("expression", self._parse_range()) 6072 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6073 args = self._parse_csv(self._parse_bitwise) 6074 this.set("start", seq_get(args, 0)) 6075 this.set("increment", seq_get(args, 1)) 6076 6077 self._match_r_paren() 6078 6079 return this 6080 6081 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6082 self._match_text_seq("LENGTH") 6083 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6084 6085 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6086 if self._match_text_seq("NULL"): 6087 return self.expression(exp.NotNullColumnConstraint) 6088 if self._match_text_seq("CASESPECIFIC"): 6089 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6090 if self._match_text_seq("FOR", "REPLICATION"): 6091 return self.expression(exp.NotForReplicationColumnConstraint) 6092 6093 # Unconsume the `NOT` token 6094 self._retreat(self._index - 1) 6095 return None 6096 6097 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6098 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6099 6100 procedure_option_follows = ( 6101 self._match(TokenType.WITH, advance=False) 6102 and self._next 6103 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6104 ) 6105 6106 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6107 return self.expression( 6108 exp.ColumnConstraint, 6109 this=this, 6110 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6111 ) 6112 6113 return this 6114 6115 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6116 if not self._match(TokenType.CONSTRAINT): 6117 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6118 6119 return self.expression( 6120 exp.Constraint, 6121 this=self._parse_id_var(), 6122 expressions=self._parse_unnamed_constraints(), 6123 ) 6124 6125 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6126 constraints = [] 6127 while True: 6128 constraint = self._parse_unnamed_constraint() or self._parse_function() 6129 if not constraint: 6130 break 6131 constraints.append(constraint) 6132 6133 return constraints 6134 6135 def _parse_unnamed_constraint( 6136 self, constraints: t.Optional[t.Collection[str]] = None 6137 ) -> t.Optional[exp.Expression]: 6138 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6139 constraints or self.CONSTRAINT_PARSERS 6140 ): 6141 return None 6142 6143 constraint = self._prev.text.upper() 6144 if constraint not in self.CONSTRAINT_PARSERS: 6145 self.raise_error(f"No parser found for schema constraint {constraint}.") 6146 6147 return self.CONSTRAINT_PARSERS[constraint](self) 6148 6149 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6150 return self._parse_id_var(any_token=False) 6151 6152 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6153 self._match_text_seq("KEY") 6154 return self.expression( 6155 exp.UniqueColumnConstraint, 6156 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6157 this=self._parse_schema(self._parse_unique_key()), 6158 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6159 on_conflict=self._parse_on_conflict(), 6160 options=self._parse_key_constraint_options(), 6161 ) 6162 6163 def _parse_key_constraint_options(self) -> t.List[str]: 6164 options = [] 6165 while True: 6166 if not self._curr: 6167 break 6168 6169 if self._match(TokenType.ON): 6170 action = None 6171 on = self._advance_any() and self._prev.text 6172 6173 if self._match_text_seq("NO", "ACTION"): 6174 action = "NO ACTION" 6175 elif self._match_text_seq("CASCADE"): 6176 action = "CASCADE" 6177 elif self._match_text_seq("RESTRICT"): 6178 action = "RESTRICT" 6179 elif self._match_pair(TokenType.SET, TokenType.NULL): 6180 action = "SET NULL" 6181 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6182 action = "SET DEFAULT" 6183 else: 6184 self.raise_error("Invalid key constraint") 6185 6186 options.append(f"ON {on} {action}") 6187 else: 6188 var = self._parse_var_from_options( 6189 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6190 ) 6191 if not var: 6192 break 6193 options.append(var.name) 6194 6195 return options 6196 6197 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6198 if match and not self._match(TokenType.REFERENCES): 6199 return None 6200 6201 expressions = None 6202 this = self._parse_table(schema=True) 6203 options = self._parse_key_constraint_options() 6204 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6205 6206 def _parse_foreign_key(self) -> exp.ForeignKey: 6207 expressions = ( 6208 self._parse_wrapped_id_vars() 6209 if not self._match(TokenType.REFERENCES, advance=False) 6210 else None 6211 ) 6212 reference = self._parse_references() 6213 on_options = {} 6214 6215 while self._match(TokenType.ON): 6216 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6217 self.raise_error("Expected DELETE or UPDATE") 6218 6219 kind = self._prev.text.lower() 6220 6221 if self._match_text_seq("NO", "ACTION"): 6222 action = "NO ACTION" 6223 elif self._match(TokenType.SET): 6224 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6225 action = "SET " + self._prev.text.upper() 6226 else: 6227 self._advance() 6228 action = self._prev.text.upper() 6229 6230 on_options[kind] = action 6231 6232 return self.expression( 6233 exp.ForeignKey, 6234 expressions=expressions, 6235 reference=reference, 6236 options=self._parse_key_constraint_options(), 6237 **on_options, # type: ignore 6238 ) 6239 6240 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6241 return self._parse_ordered() or self._parse_field() 6242 6243 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6244 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6245 self._retreat(self._index - 1) 6246 return None 6247 6248 id_vars = self._parse_wrapped_id_vars() 6249 return self.expression( 6250 exp.PeriodForSystemTimeConstraint, 6251 this=seq_get(id_vars, 0), 6252 expression=seq_get(id_vars, 1), 6253 ) 6254 6255 def _parse_primary_key( 6256 self, wrapped_optional: bool = False, in_props: bool = False 6257 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6258 desc = ( 6259 self._match_set((TokenType.ASC, TokenType.DESC)) 6260 and self._prev.token_type == TokenType.DESC 6261 ) 6262 6263 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6264 return self.expression( 6265 exp.PrimaryKeyColumnConstraint, 6266 desc=desc, 6267 options=self._parse_key_constraint_options(), 6268 ) 6269 6270 expressions = self._parse_wrapped_csv( 6271 self._parse_primary_key_part, optional=wrapped_optional 6272 ) 6273 options = self._parse_key_constraint_options() 6274 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 6275 6276 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6277 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6278 6279 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6280 """ 6281 Parses a datetime column in ODBC format. We parse the column into the corresponding 6282 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6283 same as we did for `DATE('yyyy-mm-dd')`. 6284 6285 Reference: 6286 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6287 """ 6288 self._match(TokenType.VAR) 6289 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6290 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6291 if not self._match(TokenType.R_BRACE): 6292 self.raise_error("Expected }") 6293 return expression 6294 6295 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6296 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6297 return this 6298 6299 bracket_kind = self._prev.token_type 6300 if ( 6301 bracket_kind == TokenType.L_BRACE 6302 and self._curr 6303 and self._curr.token_type == TokenType.VAR 6304 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6305 ): 6306 return self._parse_odbc_datetime_literal() 6307 6308 expressions = self._parse_csv( 6309 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6310 ) 6311 6312 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6313 self.raise_error("Expected ]") 6314 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6315 self.raise_error("Expected }") 6316 6317 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6318 if bracket_kind == TokenType.L_BRACE: 6319 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6320 elif not this: 6321 this = build_array_constructor( 6322 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6323 ) 6324 else: 6325 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6326 if constructor_type: 6327 return build_array_constructor( 6328 constructor_type, 6329 args=expressions, 6330 bracket_kind=bracket_kind, 6331 dialect=self.dialect, 6332 ) 6333 6334 expressions = apply_index_offset( 6335 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6336 ) 6337 this = self.expression( 6338 exp.Bracket, 6339 this=this, 6340 expressions=expressions, 6341 comments=this.pop_comments(), 6342 ) 6343 6344 self._add_comments(this) 6345 return self._parse_bracket(this) 6346 6347 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6348 if self._match(TokenType.COLON): 6349 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6350 return this 6351 6352 def _parse_case(self) -> t.Optional[exp.Expression]: 6353 ifs = [] 6354 default = None 6355 6356 comments = self._prev_comments 6357 expression = self._parse_assignment() 6358 6359 while self._match(TokenType.WHEN): 6360 this = self._parse_assignment() 6361 self._match(TokenType.THEN) 6362 then = self._parse_assignment() 6363 ifs.append(self.expression(exp.If, this=this, true=then)) 6364 6365 if self._match(TokenType.ELSE): 6366 default = self._parse_assignment() 6367 6368 if not self._match(TokenType.END): 6369 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6370 default = exp.column("interval") 6371 else: 6372 self.raise_error("Expected END after CASE", self._prev) 6373 6374 return self.expression( 6375 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6376 ) 6377 6378 def _parse_if(self) -> t.Optional[exp.Expression]: 6379 if self._match(TokenType.L_PAREN): 6380 args = self._parse_csv( 6381 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6382 ) 6383 this = self.validate_expression(exp.If.from_arg_list(args), args) 6384 self._match_r_paren() 6385 else: 6386 index = self._index - 1 6387 6388 if self.NO_PAREN_IF_COMMANDS and index == 0: 6389 return self._parse_as_command(self._prev) 6390 6391 condition = self._parse_assignment() 6392 6393 if not condition: 6394 self._retreat(index) 6395 return None 6396 6397 self._match(TokenType.THEN) 6398 true = self._parse_assignment() 6399 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6400 self._match(TokenType.END) 6401 this = self.expression(exp.If, this=condition, true=true, false=false) 6402 6403 return this 6404 6405 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6406 if not self._match_text_seq("VALUE", "FOR"): 6407 self._retreat(self._index - 1) 6408 return None 6409 6410 return self.expression( 6411 exp.NextValueFor, 6412 this=self._parse_column(), 6413 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6414 ) 6415 6416 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6417 this = self._parse_function() or self._parse_var_or_string(upper=True) 6418 6419 if self._match(TokenType.FROM): 6420 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6421 6422 if not self._match(TokenType.COMMA): 6423 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6424 6425 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6426 6427 def _parse_gap_fill(self) -> exp.GapFill: 6428 self._match(TokenType.TABLE) 6429 this = self._parse_table() 6430 6431 self._match(TokenType.COMMA) 6432 args = [this, *self._parse_csv(self._parse_lambda)] 6433 6434 gap_fill = exp.GapFill.from_arg_list(args) 6435 return self.validate_expression(gap_fill, args) 6436 6437 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6438 this = self._parse_assignment() 6439 6440 if not self._match(TokenType.ALIAS): 6441 if self._match(TokenType.COMMA): 6442 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6443 6444 self.raise_error("Expected AS after CAST") 6445 6446 fmt = None 6447 to = self._parse_types() 6448 6449 default = self._match(TokenType.DEFAULT) 6450 if default: 6451 default = self._parse_bitwise() 6452 self._match_text_seq("ON", "CONVERSION", "ERROR") 6453 6454 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6455 fmt_string = self._parse_string() 6456 fmt = self._parse_at_time_zone(fmt_string) 6457 6458 if not to: 6459 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6460 if to.this in exp.DataType.TEMPORAL_TYPES: 6461 this = self.expression( 6462 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6463 this=this, 6464 format=exp.Literal.string( 6465 format_time( 6466 fmt_string.this if fmt_string else "", 6467 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6468 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6469 ) 6470 ), 6471 safe=safe, 6472 ) 6473 6474 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6475 this.set("zone", fmt.args["zone"]) 6476 return this 6477 elif not to: 6478 self.raise_error("Expected TYPE after CAST") 6479 elif isinstance(to, exp.Identifier): 6480 to = exp.DataType.build(to.name, udt=True) 6481 elif to.this == exp.DataType.Type.CHAR: 6482 if self._match(TokenType.CHARACTER_SET): 6483 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6484 6485 return self.expression( 6486 exp.Cast if strict else exp.TryCast, 6487 this=this, 6488 to=to, 6489 format=fmt, 6490 safe=safe, 6491 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6492 default=default, 6493 ) 6494 6495 def _parse_string_agg(self) -> exp.GroupConcat: 6496 if self._match(TokenType.DISTINCT): 6497 args: t.List[t.Optional[exp.Expression]] = [ 6498 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6499 ] 6500 if self._match(TokenType.COMMA): 6501 args.extend(self._parse_csv(self._parse_assignment)) 6502 else: 6503 args = self._parse_csv(self._parse_assignment) # type: ignore 6504 6505 if self._match_text_seq("ON", "OVERFLOW"): 6506 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6507 if self._match_text_seq("ERROR"): 6508 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6509 else: 6510 self._match_text_seq("TRUNCATE") 6511 on_overflow = self.expression( 6512 exp.OverflowTruncateBehavior, 6513 this=self._parse_string(), 6514 with_count=( 6515 self._match_text_seq("WITH", "COUNT") 6516 or not self._match_text_seq("WITHOUT", "COUNT") 6517 ), 6518 ) 6519 else: 6520 on_overflow = None 6521 6522 index = self._index 6523 if not self._match(TokenType.R_PAREN) and args: 6524 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6525 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6526 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6527 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6528 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6529 6530 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6531 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6532 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6533 if not self._match_text_seq("WITHIN", "GROUP"): 6534 self._retreat(index) 6535 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6536 6537 # The corresponding match_r_paren will be called in parse_function (caller) 6538 self._match_l_paren() 6539 6540 return self.expression( 6541 exp.GroupConcat, 6542 this=self._parse_order(this=seq_get(args, 0)), 6543 separator=seq_get(args, 1), 6544 on_overflow=on_overflow, 6545 ) 6546 6547 def _parse_convert( 6548 self, strict: bool, safe: t.Optional[bool] = None 6549 ) -> t.Optional[exp.Expression]: 6550 this = self._parse_bitwise() 6551 6552 if self._match(TokenType.USING): 6553 to: t.Optional[exp.Expression] = self.expression( 6554 exp.CharacterSet, this=self._parse_var() 6555 ) 6556 elif self._match(TokenType.COMMA): 6557 to = self._parse_types() 6558 else: 6559 to = None 6560 6561 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6562 6563 def _parse_xml_table(self) -> exp.XMLTable: 6564 namespaces = None 6565 passing = None 6566 columns = None 6567 6568 if self._match_text_seq("XMLNAMESPACES", "("): 6569 namespaces = self._parse_xml_namespace() 6570 self._match_text_seq(")", ",") 6571 6572 this = self._parse_string() 6573 6574 if self._match_text_seq("PASSING"): 6575 # The BY VALUE keywords are optional and are provided for semantic clarity 6576 self._match_text_seq("BY", "VALUE") 6577 passing = self._parse_csv(self._parse_column) 6578 6579 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6580 6581 if self._match_text_seq("COLUMNS"): 6582 columns = self._parse_csv(self._parse_field_def) 6583 6584 return self.expression( 6585 exp.XMLTable, 6586 this=this, 6587 namespaces=namespaces, 6588 passing=passing, 6589 columns=columns, 6590 by_ref=by_ref, 6591 ) 6592 6593 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6594 namespaces = [] 6595 6596 while True: 6597 if self._match(TokenType.DEFAULT): 6598 uri = self._parse_string() 6599 else: 6600 uri = self._parse_alias(self._parse_string()) 6601 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6602 if not self._match(TokenType.COMMA): 6603 break 6604 6605 return namespaces 6606 6607 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6608 """ 6609 There are generally two variants of the DECODE function: 6610 6611 - DECODE(bin, charset) 6612 - DECODE(expression, search, result [, search, result] ... [, default]) 6613 6614 The second variant will always be parsed into a CASE expression. Note that NULL 6615 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6616 instead of relying on pattern matching. 6617 """ 6618 args = self._parse_csv(self._parse_assignment) 6619 6620 if len(args) < 3: 6621 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6622 6623 expression, *expressions = args 6624 if not expression: 6625 return None 6626 6627 ifs = [] 6628 for search, result in zip(expressions[::2], expressions[1::2]): 6629 if not search or not result: 6630 return None 6631 6632 if isinstance(search, exp.Literal): 6633 ifs.append( 6634 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6635 ) 6636 elif isinstance(search, exp.Null): 6637 ifs.append( 6638 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6639 ) 6640 else: 6641 cond = exp.or_( 6642 exp.EQ(this=expression.copy(), expression=search), 6643 exp.and_( 6644 exp.Is(this=expression.copy(), expression=exp.Null()), 6645 exp.Is(this=search.copy(), expression=exp.Null()), 6646 copy=False, 6647 ), 6648 copy=False, 6649 ) 6650 ifs.append(exp.If(this=cond, true=result)) 6651 6652 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6653 6654 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6655 self._match_text_seq("KEY") 6656 key = self._parse_column() 6657 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6658 self._match_text_seq("VALUE") 6659 value = self._parse_bitwise() 6660 6661 if not key and not value: 6662 return None 6663 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6664 6665 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6666 if not this or not self._match_text_seq("FORMAT", "JSON"): 6667 return this 6668 6669 return self.expression(exp.FormatJson, this=this) 6670 6671 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6672 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6673 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6674 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6675 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6676 else: 6677 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6678 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6679 6680 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6681 6682 if not empty and not error and not null: 6683 return None 6684 6685 return self.expression( 6686 exp.OnCondition, 6687 empty=empty, 6688 error=error, 6689 null=null, 6690 ) 6691 6692 def _parse_on_handling( 6693 self, on: str, *values: str 6694 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6695 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6696 for value in values: 6697 if self._match_text_seq(value, "ON", on): 6698 return f"{value} ON {on}" 6699 6700 index = self._index 6701 if self._match(TokenType.DEFAULT): 6702 default_value = self._parse_bitwise() 6703 if self._match_text_seq("ON", on): 6704 return default_value 6705 6706 self._retreat(index) 6707 6708 return None 6709 6710 @t.overload 6711 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6712 6713 @t.overload 6714 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6715 6716 def _parse_json_object(self, agg=False): 6717 star = self._parse_star() 6718 expressions = ( 6719 [star] 6720 if star 6721 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6722 ) 6723 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6724 6725 unique_keys = None 6726 if self._match_text_seq("WITH", "UNIQUE"): 6727 unique_keys = True 6728 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6729 unique_keys = False 6730 6731 self._match_text_seq("KEYS") 6732 6733 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6734 self._parse_type() 6735 ) 6736 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6737 6738 return self.expression( 6739 exp.JSONObjectAgg if agg else exp.JSONObject, 6740 expressions=expressions, 6741 null_handling=null_handling, 6742 unique_keys=unique_keys, 6743 return_type=return_type, 6744 encoding=encoding, 6745 ) 6746 6747 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6748 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6749 if not self._match_text_seq("NESTED"): 6750 this = self._parse_id_var() 6751 kind = self._parse_types(allow_identifiers=False) 6752 nested = None 6753 else: 6754 this = None 6755 kind = None 6756 nested = True 6757 6758 path = self._match_text_seq("PATH") and self._parse_string() 6759 nested_schema = nested and self._parse_json_schema() 6760 6761 return self.expression( 6762 exp.JSONColumnDef, 6763 this=this, 6764 kind=kind, 6765 path=path, 6766 nested_schema=nested_schema, 6767 ) 6768 6769 def _parse_json_schema(self) -> exp.JSONSchema: 6770 self._match_text_seq("COLUMNS") 6771 return self.expression( 6772 exp.JSONSchema, 6773 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6774 ) 6775 6776 def _parse_json_table(self) -> exp.JSONTable: 6777 this = self._parse_format_json(self._parse_bitwise()) 6778 path = self._match(TokenType.COMMA) and self._parse_string() 6779 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6780 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6781 schema = self._parse_json_schema() 6782 6783 return exp.JSONTable( 6784 this=this, 6785 schema=schema, 6786 path=path, 6787 error_handling=error_handling, 6788 empty_handling=empty_handling, 6789 ) 6790 6791 def _parse_match_against(self) -> exp.MatchAgainst: 6792 expressions = self._parse_csv(self._parse_column) 6793 6794 self._match_text_seq(")", "AGAINST", "(") 6795 6796 this = self._parse_string() 6797 6798 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6799 modifier = "IN NATURAL LANGUAGE MODE" 6800 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6801 modifier = f"{modifier} WITH QUERY EXPANSION" 6802 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6803 modifier = "IN BOOLEAN MODE" 6804 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6805 modifier = "WITH QUERY EXPANSION" 6806 else: 6807 modifier = None 6808 6809 return self.expression( 6810 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6811 ) 6812 6813 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6814 def _parse_open_json(self) -> exp.OpenJSON: 6815 this = self._parse_bitwise() 6816 path = self._match(TokenType.COMMA) and self._parse_string() 6817 6818 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6819 this = self._parse_field(any_token=True) 6820 kind = self._parse_types() 6821 path = self._parse_string() 6822 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6823 6824 return self.expression( 6825 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6826 ) 6827 6828 expressions = None 6829 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6830 self._match_l_paren() 6831 expressions = self._parse_csv(_parse_open_json_column_def) 6832 6833 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6834 6835 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6836 args = self._parse_csv(self._parse_bitwise) 6837 6838 if self._match(TokenType.IN): 6839 return self.expression( 6840 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6841 ) 6842 6843 if haystack_first: 6844 haystack = seq_get(args, 0) 6845 needle = seq_get(args, 1) 6846 else: 6847 haystack = seq_get(args, 1) 6848 needle = seq_get(args, 0) 6849 6850 return self.expression( 6851 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6852 ) 6853 6854 def _parse_predict(self) -> exp.Predict: 6855 self._match_text_seq("MODEL") 6856 this = self._parse_table() 6857 6858 self._match(TokenType.COMMA) 6859 self._match_text_seq("TABLE") 6860 6861 return self.expression( 6862 exp.Predict, 6863 this=this, 6864 expression=self._parse_table(), 6865 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6866 ) 6867 6868 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6869 args = self._parse_csv(self._parse_table) 6870 return exp.JoinHint(this=func_name.upper(), expressions=args) 6871 6872 def _parse_substring(self) -> exp.Substring: 6873 # Postgres supports the form: substring(string [from int] [for int]) 6874 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6875 6876 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6877 6878 if self._match(TokenType.FROM): 6879 args.append(self._parse_bitwise()) 6880 if self._match(TokenType.FOR): 6881 if len(args) == 1: 6882 args.append(exp.Literal.number(1)) 6883 args.append(self._parse_bitwise()) 6884 6885 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6886 6887 def _parse_trim(self) -> exp.Trim: 6888 # https://www.w3resource.com/sql/character-functions/trim.php 6889 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6890 6891 position = None 6892 collation = None 6893 expression = None 6894 6895 if self._match_texts(self.TRIM_TYPES): 6896 position = self._prev.text.upper() 6897 6898 this = self._parse_bitwise() 6899 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6900 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6901 expression = self._parse_bitwise() 6902 6903 if invert_order: 6904 this, expression = expression, this 6905 6906 if self._match(TokenType.COLLATE): 6907 collation = self._parse_bitwise() 6908 6909 return self.expression( 6910 exp.Trim, this=this, position=position, expression=expression, collation=collation 6911 ) 6912 6913 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6914 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6915 6916 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6917 return self._parse_window(self._parse_id_var(), alias=True) 6918 6919 def _parse_respect_or_ignore_nulls( 6920 self, this: t.Optional[exp.Expression] 6921 ) -> t.Optional[exp.Expression]: 6922 if self._match_text_seq("IGNORE", "NULLS"): 6923 return self.expression(exp.IgnoreNulls, this=this) 6924 if self._match_text_seq("RESPECT", "NULLS"): 6925 return self.expression(exp.RespectNulls, this=this) 6926 return this 6927 6928 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6929 if self._match(TokenType.HAVING): 6930 self._match_texts(("MAX", "MIN")) 6931 max = self._prev.text.upper() != "MIN" 6932 return self.expression( 6933 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6934 ) 6935 6936 return this 6937 6938 def _parse_window( 6939 self, this: t.Optional[exp.Expression], alias: bool = False 6940 ) -> t.Optional[exp.Expression]: 6941 func = this 6942 comments = func.comments if isinstance(func, exp.Expression) else None 6943 6944 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6945 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6946 if self._match_text_seq("WITHIN", "GROUP"): 6947 order = self._parse_wrapped(self._parse_order) 6948 this = self.expression(exp.WithinGroup, this=this, expression=order) 6949 6950 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6951 self._match(TokenType.WHERE) 6952 this = self.expression( 6953 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6954 ) 6955 self._match_r_paren() 6956 6957 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6958 # Some dialects choose to implement and some do not. 6959 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6960 6961 # There is some code above in _parse_lambda that handles 6962 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6963 6964 # The below changes handle 6965 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6966 6967 # Oracle allows both formats 6968 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6969 # and Snowflake chose to do the same for familiarity 6970 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6971 if isinstance(this, exp.AggFunc): 6972 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6973 6974 if ignore_respect and ignore_respect is not this: 6975 ignore_respect.replace(ignore_respect.this) 6976 this = self.expression(ignore_respect.__class__, this=this) 6977 6978 this = self._parse_respect_or_ignore_nulls(this) 6979 6980 # bigquery select from window x AS (partition by ...) 6981 if alias: 6982 over = None 6983 self._match(TokenType.ALIAS) 6984 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6985 return this 6986 else: 6987 over = self._prev.text.upper() 6988 6989 if comments and isinstance(func, exp.Expression): 6990 func.pop_comments() 6991 6992 if not self._match(TokenType.L_PAREN): 6993 return self.expression( 6994 exp.Window, 6995 comments=comments, 6996 this=this, 6997 alias=self._parse_id_var(False), 6998 over=over, 6999 ) 7000 7001 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7002 7003 first = self._match(TokenType.FIRST) 7004 if self._match_text_seq("LAST"): 7005 first = False 7006 7007 partition, order = self._parse_partition_and_order() 7008 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7009 7010 if kind: 7011 self._match(TokenType.BETWEEN) 7012 start = self._parse_window_spec() 7013 self._match(TokenType.AND) 7014 end = self._parse_window_spec() 7015 exclude = ( 7016 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7017 if self._match_text_seq("EXCLUDE") 7018 else None 7019 ) 7020 7021 spec = self.expression( 7022 exp.WindowSpec, 7023 kind=kind, 7024 start=start["value"], 7025 start_side=start["side"], 7026 end=end["value"], 7027 end_side=end["side"], 7028 exclude=exclude, 7029 ) 7030 else: 7031 spec = None 7032 7033 self._match_r_paren() 7034 7035 window = self.expression( 7036 exp.Window, 7037 comments=comments, 7038 this=this, 7039 partition_by=partition, 7040 order=order, 7041 spec=spec, 7042 alias=window_alias, 7043 over=over, 7044 first=first, 7045 ) 7046 7047 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7048 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7049 return self._parse_window(window, alias=alias) 7050 7051 return window 7052 7053 def _parse_partition_and_order( 7054 self, 7055 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7056 return self._parse_partition_by(), self._parse_order() 7057 7058 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7059 self._match(TokenType.BETWEEN) 7060 7061 return { 7062 "value": ( 7063 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7064 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7065 or self._parse_bitwise() 7066 ), 7067 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7068 } 7069 7070 def _parse_alias( 7071 self, this: t.Optional[exp.Expression], explicit: bool = False 7072 ) -> t.Optional[exp.Expression]: 7073 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7074 # so this section tries to parse the clause version and if it fails, it treats the token 7075 # as an identifier (alias) 7076 if self._can_parse_limit_or_offset(): 7077 return this 7078 7079 any_token = self._match(TokenType.ALIAS) 7080 comments = self._prev_comments or [] 7081 7082 if explicit and not any_token: 7083 return this 7084 7085 if self._match(TokenType.L_PAREN): 7086 aliases = self.expression( 7087 exp.Aliases, 7088 comments=comments, 7089 this=this, 7090 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7091 ) 7092 self._match_r_paren(aliases) 7093 return aliases 7094 7095 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7096 self.STRING_ALIASES and self._parse_string_as_identifier() 7097 ) 7098 7099 if alias: 7100 comments.extend(alias.pop_comments()) 7101 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7102 column = this.this 7103 7104 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7105 if not this.comments and column and column.comments: 7106 this.comments = column.pop_comments() 7107 7108 return this 7109 7110 def _parse_id_var( 7111 self, 7112 any_token: bool = True, 7113 tokens: t.Optional[t.Collection[TokenType]] = None, 7114 ) -> t.Optional[exp.Expression]: 7115 expression = self._parse_identifier() 7116 if not expression and ( 7117 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7118 ): 7119 quoted = self._prev.token_type == TokenType.STRING 7120 expression = self._identifier_expression(quoted=quoted) 7121 7122 return expression 7123 7124 def _parse_string(self) -> t.Optional[exp.Expression]: 7125 if self._match_set(self.STRING_PARSERS): 7126 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7127 return self._parse_placeholder() 7128 7129 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7130 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7131 if output: 7132 output.update_positions(self._prev) 7133 return output 7134 7135 def _parse_number(self) -> t.Optional[exp.Expression]: 7136 if self._match_set(self.NUMERIC_PARSERS): 7137 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7138 return self._parse_placeholder() 7139 7140 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7141 if self._match(TokenType.IDENTIFIER): 7142 return self._identifier_expression(quoted=True) 7143 return self._parse_placeholder() 7144 7145 def _parse_var( 7146 self, 7147 any_token: bool = False, 7148 tokens: t.Optional[t.Collection[TokenType]] = None, 7149 upper: bool = False, 7150 ) -> t.Optional[exp.Expression]: 7151 if ( 7152 (any_token and self._advance_any()) 7153 or self._match(TokenType.VAR) 7154 or (self._match_set(tokens) if tokens else False) 7155 ): 7156 return self.expression( 7157 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7158 ) 7159 return self._parse_placeholder() 7160 7161 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7162 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7163 self._advance() 7164 return self._prev 7165 return None 7166 7167 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7168 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7169 7170 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7171 return self._parse_primary() or self._parse_var(any_token=True) 7172 7173 def _parse_null(self) -> t.Optional[exp.Expression]: 7174 if self._match_set(self.NULL_TOKENS): 7175 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7176 return self._parse_placeholder() 7177 7178 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7179 if self._match(TokenType.TRUE): 7180 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7181 if self._match(TokenType.FALSE): 7182 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7183 return self._parse_placeholder() 7184 7185 def _parse_star(self) -> t.Optional[exp.Expression]: 7186 if self._match(TokenType.STAR): 7187 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7188 return self._parse_placeholder() 7189 7190 def _parse_parameter(self) -> exp.Parameter: 7191 this = self._parse_identifier() or self._parse_primary_or_var() 7192 return self.expression(exp.Parameter, this=this) 7193 7194 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7195 if self._match_set(self.PLACEHOLDER_PARSERS): 7196 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7197 if placeholder: 7198 return placeholder 7199 self._advance(-1) 7200 return None 7201 7202 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7203 if not self._match_texts(keywords): 7204 return None 7205 if self._match(TokenType.L_PAREN, advance=False): 7206 return self._parse_wrapped_csv(self._parse_expression) 7207 7208 expression = self._parse_expression() 7209 return [expression] if expression else None 7210 7211 def _parse_csv( 7212 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7213 ) -> t.List[exp.Expression]: 7214 parse_result = parse_method() 7215 items = [parse_result] if parse_result is not None else [] 7216 7217 while self._match(sep): 7218 self._add_comments(parse_result) 7219 parse_result = parse_method() 7220 if parse_result is not None: 7221 items.append(parse_result) 7222 7223 return items 7224 7225 def _parse_tokens( 7226 self, parse_method: t.Callable, expressions: t.Dict 7227 ) -> t.Optional[exp.Expression]: 7228 this = parse_method() 7229 7230 while self._match_set(expressions): 7231 this = self.expression( 7232 expressions[self._prev.token_type], 7233 this=this, 7234 comments=self._prev_comments, 7235 expression=parse_method(), 7236 ) 7237 7238 return this 7239 7240 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 7241 while self._match(TokenType.PIPE_GT): 7242 start = self._curr 7243 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 7244 if not parser: 7245 set_op_query = self._parse_pipe_syntax_set_operator(query) 7246 if not set_op_query: 7247 self._retreat(start) 7248 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 7249 break 7250 7251 query = set_op_query 7252 else: 7253 query = parser(self, query) 7254 7255 return query 7256 7257 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7258 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7259 7260 def _parse_wrapped_csv( 7261 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7262 ) -> t.List[exp.Expression]: 7263 return self._parse_wrapped( 7264 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7265 ) 7266 7267 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7268 wrapped = self._match(TokenType.L_PAREN) 7269 if not wrapped and not optional: 7270 self.raise_error("Expecting (") 7271 parse_result = parse_method() 7272 if wrapped: 7273 self._match_r_paren() 7274 return parse_result 7275 7276 def _parse_expressions(self) -> t.List[exp.Expression]: 7277 return self._parse_csv(self._parse_expression) 7278 7279 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7280 return self._parse_select() or self._parse_set_operations( 7281 self._parse_alias(self._parse_assignment(), explicit=True) 7282 if alias 7283 else self._parse_assignment() 7284 ) 7285 7286 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7287 return self._parse_query_modifiers( 7288 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7289 ) 7290 7291 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7292 this = None 7293 if self._match_texts(self.TRANSACTION_KIND): 7294 this = self._prev.text 7295 7296 self._match_texts(("TRANSACTION", "WORK")) 7297 7298 modes = [] 7299 while True: 7300 mode = [] 7301 while self._match(TokenType.VAR): 7302 mode.append(self._prev.text) 7303 7304 if mode: 7305 modes.append(" ".join(mode)) 7306 if not self._match(TokenType.COMMA): 7307 break 7308 7309 return self.expression(exp.Transaction, this=this, modes=modes) 7310 7311 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7312 chain = None 7313 savepoint = None 7314 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7315 7316 self._match_texts(("TRANSACTION", "WORK")) 7317 7318 if self._match_text_seq("TO"): 7319 self._match_text_seq("SAVEPOINT") 7320 savepoint = self._parse_id_var() 7321 7322 if self._match(TokenType.AND): 7323 chain = not self._match_text_seq("NO") 7324 self._match_text_seq("CHAIN") 7325 7326 if is_rollback: 7327 return self.expression(exp.Rollback, savepoint=savepoint) 7328 7329 return self.expression(exp.Commit, chain=chain) 7330 7331 def _parse_refresh(self) -> exp.Refresh: 7332 self._match(TokenType.TABLE) 7333 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7334 7335 def _parse_add_column(self) -> t.Optional[exp.Expression]: 7336 if not self._prev.text.upper() == "ADD": 7337 return None 7338 7339 self._match(TokenType.COLUMN) 7340 exists_column = self._parse_exists(not_=True) 7341 expression = self._parse_field_def() 7342 7343 if expression: 7344 expression.set("exists", exists_column) 7345 7346 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7347 if self._match_texts(("FIRST", "AFTER")): 7348 position = self._prev.text 7349 column_position = self.expression( 7350 exp.ColumnPosition, this=self._parse_column(), position=position 7351 ) 7352 expression.set("position", column_position) 7353 7354 return expression 7355 7356 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7357 drop = self._match(TokenType.DROP) and self._parse_drop() 7358 if drop and not isinstance(drop, exp.Command): 7359 drop.set("kind", drop.args.get("kind", "COLUMN")) 7360 return drop 7361 7362 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7363 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7364 return self.expression( 7365 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7366 ) 7367 7368 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7369 def _parse_add_column_or_constraint(): 7370 self._match_text_seq("ADD") 7371 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7372 return self.expression( 7373 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7374 ) 7375 return self._parse_add_column() 7376 7377 if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq( 7378 "COLUMNS" 7379 ): 7380 schema = self._parse_schema() 7381 7382 return ensure_list(schema) if schema else self._parse_csv(self._parse_field_def) 7383 7384 return self._parse_csv(_parse_add_column_or_constraint) 7385 7386 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7387 if self._match_texts(self.ALTER_ALTER_PARSERS): 7388 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7389 7390 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7391 # keyword after ALTER we default to parsing this statement 7392 self._match(TokenType.COLUMN) 7393 column = self._parse_field(any_token=True) 7394 7395 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7396 return self.expression(exp.AlterColumn, this=column, drop=True) 7397 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7398 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7399 if self._match(TokenType.COMMENT): 7400 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7401 if self._match_text_seq("DROP", "NOT", "NULL"): 7402 return self.expression( 7403 exp.AlterColumn, 7404 this=column, 7405 drop=True, 7406 allow_null=True, 7407 ) 7408 if self._match_text_seq("SET", "NOT", "NULL"): 7409 return self.expression( 7410 exp.AlterColumn, 7411 this=column, 7412 allow_null=False, 7413 ) 7414 7415 if self._match_text_seq("SET", "VISIBLE"): 7416 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7417 if self._match_text_seq("SET", "INVISIBLE"): 7418 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7419 7420 self._match_text_seq("SET", "DATA") 7421 self._match_text_seq("TYPE") 7422 return self.expression( 7423 exp.AlterColumn, 7424 this=column, 7425 dtype=self._parse_types(), 7426 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7427 using=self._match(TokenType.USING) and self._parse_assignment(), 7428 ) 7429 7430 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7431 if self._match_texts(("ALL", "EVEN", "AUTO")): 7432 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7433 7434 self._match_text_seq("KEY", "DISTKEY") 7435 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7436 7437 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7438 if compound: 7439 self._match_text_seq("SORTKEY") 7440 7441 if self._match(TokenType.L_PAREN, advance=False): 7442 return self.expression( 7443 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7444 ) 7445 7446 self._match_texts(("AUTO", "NONE")) 7447 return self.expression( 7448 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7449 ) 7450 7451 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7452 index = self._index - 1 7453 7454 partition_exists = self._parse_exists() 7455 if self._match(TokenType.PARTITION, advance=False): 7456 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7457 7458 self._retreat(index) 7459 return self._parse_csv(self._parse_drop_column) 7460 7461 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7462 if self._match(TokenType.COLUMN): 7463 exists = self._parse_exists() 7464 old_column = self._parse_column() 7465 to = self._match_text_seq("TO") 7466 new_column = self._parse_column() 7467 7468 if old_column is None or to is None or new_column is None: 7469 return None 7470 7471 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7472 7473 self._match_text_seq("TO") 7474 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7475 7476 def _parse_alter_table_set(self) -> exp.AlterSet: 7477 alter_set = self.expression(exp.AlterSet) 7478 7479 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7480 "TABLE", "PROPERTIES" 7481 ): 7482 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7483 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7484 alter_set.set("expressions", [self._parse_assignment()]) 7485 elif self._match_texts(("LOGGED", "UNLOGGED")): 7486 alter_set.set("option", exp.var(self._prev.text.upper())) 7487 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7488 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7489 elif self._match_text_seq("LOCATION"): 7490 alter_set.set("location", self._parse_field()) 7491 elif self._match_text_seq("ACCESS", "METHOD"): 7492 alter_set.set("access_method", self._parse_field()) 7493 elif self._match_text_seq("TABLESPACE"): 7494 alter_set.set("tablespace", self._parse_field()) 7495 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7496 alter_set.set("file_format", [self._parse_field()]) 7497 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7498 alter_set.set("file_format", self._parse_wrapped_options()) 7499 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7500 alter_set.set("copy_options", self._parse_wrapped_options()) 7501 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7502 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7503 else: 7504 if self._match_text_seq("SERDE"): 7505 alter_set.set("serde", self._parse_field()) 7506 7507 properties = self._parse_wrapped(self._parse_properties, optional=True) 7508 alter_set.set("expressions", [properties]) 7509 7510 return alter_set 7511 7512 def _parse_alter(self) -> exp.Alter | exp.Command: 7513 start = self._prev 7514 7515 alter_token = self._match_set(self.ALTERABLES) and self._prev 7516 if not alter_token: 7517 return self._parse_as_command(start) 7518 7519 exists = self._parse_exists() 7520 only = self._match_text_seq("ONLY") 7521 this = self._parse_table(schema=True) 7522 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7523 7524 if self._next: 7525 self._advance() 7526 7527 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7528 if parser: 7529 actions = ensure_list(parser(self)) 7530 not_valid = self._match_text_seq("NOT", "VALID") 7531 options = self._parse_csv(self._parse_property) 7532 7533 if not self._curr and actions: 7534 return self.expression( 7535 exp.Alter, 7536 this=this, 7537 kind=alter_token.text.upper(), 7538 exists=exists, 7539 actions=actions, 7540 only=only, 7541 options=options, 7542 cluster=cluster, 7543 not_valid=not_valid, 7544 ) 7545 7546 return self._parse_as_command(start) 7547 7548 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7549 start = self._prev 7550 # https://duckdb.org/docs/sql/statements/analyze 7551 if not self._curr: 7552 return self.expression(exp.Analyze) 7553 7554 options = [] 7555 while self._match_texts(self.ANALYZE_STYLES): 7556 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7557 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7558 else: 7559 options.append(self._prev.text.upper()) 7560 7561 this: t.Optional[exp.Expression] = None 7562 inner_expression: t.Optional[exp.Expression] = None 7563 7564 kind = self._curr and self._curr.text.upper() 7565 7566 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7567 this = self._parse_table_parts() 7568 elif self._match_text_seq("TABLES"): 7569 if self._match_set((TokenType.FROM, TokenType.IN)): 7570 kind = f"{kind} {self._prev.text.upper()}" 7571 this = self._parse_table(schema=True, is_db_reference=True) 7572 elif self._match_text_seq("DATABASE"): 7573 this = self._parse_table(schema=True, is_db_reference=True) 7574 elif self._match_text_seq("CLUSTER"): 7575 this = self._parse_table() 7576 # Try matching inner expr keywords before fallback to parse table. 7577 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7578 kind = None 7579 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7580 else: 7581 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7582 kind = None 7583 this = self._parse_table_parts() 7584 7585 partition = self._try_parse(self._parse_partition) 7586 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7587 return self._parse_as_command(start) 7588 7589 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7590 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7591 "WITH", "ASYNC", "MODE" 7592 ): 7593 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7594 else: 7595 mode = None 7596 7597 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7598 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7599 7600 properties = self._parse_properties() 7601 return self.expression( 7602 exp.Analyze, 7603 kind=kind, 7604 this=this, 7605 mode=mode, 7606 partition=partition, 7607 properties=properties, 7608 expression=inner_expression, 7609 options=options, 7610 ) 7611 7612 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7613 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7614 this = None 7615 kind = self._prev.text.upper() 7616 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7617 expressions = [] 7618 7619 if not self._match_text_seq("STATISTICS"): 7620 self.raise_error("Expecting token STATISTICS") 7621 7622 if self._match_text_seq("NOSCAN"): 7623 this = "NOSCAN" 7624 elif self._match(TokenType.FOR): 7625 if self._match_text_seq("ALL", "COLUMNS"): 7626 this = "FOR ALL COLUMNS" 7627 if self._match_texts("COLUMNS"): 7628 this = "FOR COLUMNS" 7629 expressions = self._parse_csv(self._parse_column_reference) 7630 elif self._match_text_seq("SAMPLE"): 7631 sample = self._parse_number() 7632 expressions = [ 7633 self.expression( 7634 exp.AnalyzeSample, 7635 sample=sample, 7636 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7637 ) 7638 ] 7639 7640 return self.expression( 7641 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7642 ) 7643 7644 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7645 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7646 kind = None 7647 this = None 7648 expression: t.Optional[exp.Expression] = None 7649 if self._match_text_seq("REF", "UPDATE"): 7650 kind = "REF" 7651 this = "UPDATE" 7652 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7653 this = "UPDATE SET DANGLING TO NULL" 7654 elif self._match_text_seq("STRUCTURE"): 7655 kind = "STRUCTURE" 7656 if self._match_text_seq("CASCADE", "FAST"): 7657 this = "CASCADE FAST" 7658 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7659 ("ONLINE", "OFFLINE") 7660 ): 7661 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7662 expression = self._parse_into() 7663 7664 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7665 7666 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7667 this = self._prev.text.upper() 7668 if self._match_text_seq("COLUMNS"): 7669 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7670 return None 7671 7672 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7673 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7674 if self._match_text_seq("STATISTICS"): 7675 return self.expression(exp.AnalyzeDelete, kind=kind) 7676 return None 7677 7678 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7679 if self._match_text_seq("CHAINED", "ROWS"): 7680 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7681 return None 7682 7683 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7684 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7685 this = self._prev.text.upper() 7686 expression: t.Optional[exp.Expression] = None 7687 expressions = [] 7688 update_options = None 7689 7690 if self._match_text_seq("HISTOGRAM", "ON"): 7691 expressions = self._parse_csv(self._parse_column_reference) 7692 with_expressions = [] 7693 while self._match(TokenType.WITH): 7694 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7695 if self._match_texts(("SYNC", "ASYNC")): 7696 if self._match_text_seq("MODE", advance=False): 7697 with_expressions.append(f"{self._prev.text.upper()} MODE") 7698 self._advance() 7699 else: 7700 buckets = self._parse_number() 7701 if self._match_text_seq("BUCKETS"): 7702 with_expressions.append(f"{buckets} BUCKETS") 7703 if with_expressions: 7704 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7705 7706 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7707 TokenType.UPDATE, advance=False 7708 ): 7709 update_options = self._prev.text.upper() 7710 self._advance() 7711 elif self._match_text_seq("USING", "DATA"): 7712 expression = self.expression(exp.UsingData, this=self._parse_string()) 7713 7714 return self.expression( 7715 exp.AnalyzeHistogram, 7716 this=this, 7717 expressions=expressions, 7718 expression=expression, 7719 update_options=update_options, 7720 ) 7721 7722 def _parse_merge(self) -> exp.Merge: 7723 self._match(TokenType.INTO) 7724 target = self._parse_table() 7725 7726 if target and self._match(TokenType.ALIAS, advance=False): 7727 target.set("alias", self._parse_table_alias()) 7728 7729 self._match(TokenType.USING) 7730 using = self._parse_table() 7731 7732 self._match(TokenType.ON) 7733 on = self._parse_assignment() 7734 7735 return self.expression( 7736 exp.Merge, 7737 this=target, 7738 using=using, 7739 on=on, 7740 whens=self._parse_when_matched(), 7741 returning=self._parse_returning(), 7742 ) 7743 7744 def _parse_when_matched(self) -> exp.Whens: 7745 whens = [] 7746 7747 while self._match(TokenType.WHEN): 7748 matched = not self._match(TokenType.NOT) 7749 self._match_text_seq("MATCHED") 7750 source = ( 7751 False 7752 if self._match_text_seq("BY", "TARGET") 7753 else self._match_text_seq("BY", "SOURCE") 7754 ) 7755 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7756 7757 self._match(TokenType.THEN) 7758 7759 if self._match(TokenType.INSERT): 7760 this = self._parse_star() 7761 if this: 7762 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7763 else: 7764 then = self.expression( 7765 exp.Insert, 7766 this=exp.var("ROW") 7767 if self._match_text_seq("ROW") 7768 else self._parse_value(values=False), 7769 expression=self._match_text_seq("VALUES") and self._parse_value(), 7770 ) 7771 elif self._match(TokenType.UPDATE): 7772 expressions = self._parse_star() 7773 if expressions: 7774 then = self.expression(exp.Update, expressions=expressions) 7775 else: 7776 then = self.expression( 7777 exp.Update, 7778 expressions=self._match(TokenType.SET) 7779 and self._parse_csv(self._parse_equality), 7780 ) 7781 elif self._match(TokenType.DELETE): 7782 then = self.expression(exp.Var, this=self._prev.text) 7783 else: 7784 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7785 7786 whens.append( 7787 self.expression( 7788 exp.When, 7789 matched=matched, 7790 source=source, 7791 condition=condition, 7792 then=then, 7793 ) 7794 ) 7795 return self.expression(exp.Whens, expressions=whens) 7796 7797 def _parse_show(self) -> t.Optional[exp.Expression]: 7798 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7799 if parser: 7800 return parser(self) 7801 return self._parse_as_command(self._prev) 7802 7803 def _parse_set_item_assignment( 7804 self, kind: t.Optional[str] = None 7805 ) -> t.Optional[exp.Expression]: 7806 index = self._index 7807 7808 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7809 return self._parse_set_transaction(global_=kind == "GLOBAL") 7810 7811 left = self._parse_primary() or self._parse_column() 7812 assignment_delimiter = self._match_texts(("=", "TO")) 7813 7814 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7815 self._retreat(index) 7816 return None 7817 7818 right = self._parse_statement() or self._parse_id_var() 7819 if isinstance(right, (exp.Column, exp.Identifier)): 7820 right = exp.var(right.name) 7821 7822 this = self.expression(exp.EQ, this=left, expression=right) 7823 return self.expression(exp.SetItem, this=this, kind=kind) 7824 7825 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7826 self._match_text_seq("TRANSACTION") 7827 characteristics = self._parse_csv( 7828 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7829 ) 7830 return self.expression( 7831 exp.SetItem, 7832 expressions=characteristics, 7833 kind="TRANSACTION", 7834 **{"global": global_}, # type: ignore 7835 ) 7836 7837 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7838 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7839 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7840 7841 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7842 index = self._index 7843 set_ = self.expression( 7844 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7845 ) 7846 7847 if self._curr: 7848 self._retreat(index) 7849 return self._parse_as_command(self._prev) 7850 7851 return set_ 7852 7853 def _parse_var_from_options( 7854 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7855 ) -> t.Optional[exp.Var]: 7856 start = self._curr 7857 if not start: 7858 return None 7859 7860 option = start.text.upper() 7861 continuations = options.get(option) 7862 7863 index = self._index 7864 self._advance() 7865 for keywords in continuations or []: 7866 if isinstance(keywords, str): 7867 keywords = (keywords,) 7868 7869 if self._match_text_seq(*keywords): 7870 option = f"{option} {' '.join(keywords)}" 7871 break 7872 else: 7873 if continuations or continuations is None: 7874 if raise_unmatched: 7875 self.raise_error(f"Unknown option {option}") 7876 7877 self._retreat(index) 7878 return None 7879 7880 return exp.var(option) 7881 7882 def _parse_as_command(self, start: Token) -> exp.Command: 7883 while self._curr: 7884 self._advance() 7885 text = self._find_sql(start, self._prev) 7886 size = len(start.text) 7887 self._warn_unsupported() 7888 return exp.Command(this=text[:size], expression=text[size:]) 7889 7890 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7891 settings = [] 7892 7893 self._match_l_paren() 7894 kind = self._parse_id_var() 7895 7896 if self._match(TokenType.L_PAREN): 7897 while True: 7898 key = self._parse_id_var() 7899 value = self._parse_primary() 7900 if not key and value is None: 7901 break 7902 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7903 self._match(TokenType.R_PAREN) 7904 7905 self._match_r_paren() 7906 7907 return self.expression( 7908 exp.DictProperty, 7909 this=this, 7910 kind=kind.this if kind else None, 7911 settings=settings, 7912 ) 7913 7914 def _parse_dict_range(self, this: str) -> exp.DictRange: 7915 self._match_l_paren() 7916 has_min = self._match_text_seq("MIN") 7917 if has_min: 7918 min = self._parse_var() or self._parse_primary() 7919 self._match_text_seq("MAX") 7920 max = self._parse_var() or self._parse_primary() 7921 else: 7922 max = self._parse_var() or self._parse_primary() 7923 min = exp.Literal.number(0) 7924 self._match_r_paren() 7925 return self.expression(exp.DictRange, this=this, min=min, max=max) 7926 7927 def _parse_comprehension( 7928 self, this: t.Optional[exp.Expression] 7929 ) -> t.Optional[exp.Comprehension]: 7930 index = self._index 7931 expression = self._parse_column() 7932 if not self._match(TokenType.IN): 7933 self._retreat(index - 1) 7934 return None 7935 iterator = self._parse_column() 7936 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7937 return self.expression( 7938 exp.Comprehension, 7939 this=this, 7940 expression=expression, 7941 iterator=iterator, 7942 condition=condition, 7943 ) 7944 7945 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7946 if self._match(TokenType.HEREDOC_STRING): 7947 return self.expression(exp.Heredoc, this=self._prev.text) 7948 7949 if not self._match_text_seq("$"): 7950 return None 7951 7952 tags = ["$"] 7953 tag_text = None 7954 7955 if self._is_connected(): 7956 self._advance() 7957 tags.append(self._prev.text.upper()) 7958 else: 7959 self.raise_error("No closing $ found") 7960 7961 if tags[-1] != "$": 7962 if self._is_connected() and self._match_text_seq("$"): 7963 tag_text = tags[-1] 7964 tags.append("$") 7965 else: 7966 self.raise_error("No closing $ found") 7967 7968 heredoc_start = self._curr 7969 7970 while self._curr: 7971 if self._match_text_seq(*tags, advance=False): 7972 this = self._find_sql(heredoc_start, self._prev) 7973 self._advance(len(tags)) 7974 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7975 7976 self._advance() 7977 7978 self.raise_error(f"No closing {''.join(tags)} found") 7979 return None 7980 7981 def _find_parser( 7982 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7983 ) -> t.Optional[t.Callable]: 7984 if not self._curr: 7985 return None 7986 7987 index = self._index 7988 this = [] 7989 while True: 7990 # The current token might be multiple words 7991 curr = self._curr.text.upper() 7992 key = curr.split(" ") 7993 this.append(curr) 7994 7995 self._advance() 7996 result, trie = in_trie(trie, key) 7997 if result == TrieResult.FAILED: 7998 break 7999 8000 if result == TrieResult.EXISTS: 8001 subparser = parsers[" ".join(this)] 8002 return subparser 8003 8004 self._retreat(index) 8005 return None 8006 8007 def _match(self, token_type, advance=True, expression=None): 8008 if not self._curr: 8009 return None 8010 8011 if self._curr.token_type == token_type: 8012 if advance: 8013 self._advance() 8014 self._add_comments(expression) 8015 return True 8016 8017 return None 8018 8019 def _match_set(self, types, advance=True): 8020 if not self._curr: 8021 return None 8022 8023 if self._curr.token_type in types: 8024 if advance: 8025 self._advance() 8026 return True 8027 8028 return None 8029 8030 def _match_pair(self, token_type_a, token_type_b, advance=True): 8031 if not self._curr or not self._next: 8032 return None 8033 8034 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8035 if advance: 8036 self._advance(2) 8037 return True 8038 8039 return None 8040 8041 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8042 if not self._match(TokenType.L_PAREN, expression=expression): 8043 self.raise_error("Expecting (") 8044 8045 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8046 if not self._match(TokenType.R_PAREN, expression=expression): 8047 self.raise_error("Expecting )") 8048 8049 def _match_texts(self, texts, advance=True): 8050 if ( 8051 self._curr 8052 and self._curr.token_type != TokenType.STRING 8053 and self._curr.text.upper() in texts 8054 ): 8055 if advance: 8056 self._advance() 8057 return True 8058 return None 8059 8060 def _match_text_seq(self, *texts, advance=True): 8061 index = self._index 8062 for text in texts: 8063 if ( 8064 self._curr 8065 and self._curr.token_type != TokenType.STRING 8066 and self._curr.text.upper() == text 8067 ): 8068 self._advance() 8069 else: 8070 self._retreat(index) 8071 return None 8072 8073 if not advance: 8074 self._retreat(index) 8075 8076 return True 8077 8078 def _replace_lambda( 8079 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8080 ) -> t.Optional[exp.Expression]: 8081 if not node: 8082 return node 8083 8084 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8085 8086 for column in node.find_all(exp.Column): 8087 typ = lambda_types.get(column.parts[0].name) 8088 if typ is not None: 8089 dot_or_id = column.to_dot() if column.table else column.this 8090 8091 if typ: 8092 dot_or_id = self.expression( 8093 exp.Cast, 8094 this=dot_or_id, 8095 to=typ, 8096 ) 8097 8098 parent = column.parent 8099 8100 while isinstance(parent, exp.Dot): 8101 if not isinstance(parent.parent, exp.Dot): 8102 parent.replace(dot_or_id) 8103 break 8104 parent = parent.parent 8105 else: 8106 if column is node: 8107 node = dot_or_id 8108 else: 8109 column.replace(dot_or_id) 8110 return node 8111 8112 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8113 start = self._prev 8114 8115 # Not to be confused with TRUNCATE(number, decimals) function call 8116 if self._match(TokenType.L_PAREN): 8117 self._retreat(self._index - 2) 8118 return self._parse_function() 8119 8120 # Clickhouse supports TRUNCATE DATABASE as well 8121 is_database = self._match(TokenType.DATABASE) 8122 8123 self._match(TokenType.TABLE) 8124 8125 exists = self._parse_exists(not_=False) 8126 8127 expressions = self._parse_csv( 8128 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8129 ) 8130 8131 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8132 8133 if self._match_text_seq("RESTART", "IDENTITY"): 8134 identity = "RESTART" 8135 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8136 identity = "CONTINUE" 8137 else: 8138 identity = None 8139 8140 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8141 option = self._prev.text 8142 else: 8143 option = None 8144 8145 partition = self._parse_partition() 8146 8147 # Fallback case 8148 if self._curr: 8149 return self._parse_as_command(start) 8150 8151 return self.expression( 8152 exp.TruncateTable, 8153 expressions=expressions, 8154 is_database=is_database, 8155 exists=exists, 8156 cluster=cluster, 8157 identity=identity, 8158 option=option, 8159 partition=partition, 8160 ) 8161 8162 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8163 this = self._parse_ordered(self._parse_opclass) 8164 8165 if not self._match(TokenType.WITH): 8166 return this 8167 8168 op = self._parse_var(any_token=True) 8169 8170 return self.expression(exp.WithOperator, this=this, op=op) 8171 8172 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8173 self._match(TokenType.EQ) 8174 self._match(TokenType.L_PAREN) 8175 8176 opts: t.List[t.Optional[exp.Expression]] = [] 8177 option: exp.Expression | None 8178 while self._curr and not self._match(TokenType.R_PAREN): 8179 if self._match_text_seq("FORMAT_NAME", "="): 8180 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8181 option = self._parse_format_name() 8182 else: 8183 option = self._parse_property() 8184 8185 if option is None: 8186 self.raise_error("Unable to parse option") 8187 break 8188 8189 opts.append(option) 8190 8191 return opts 8192 8193 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8194 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8195 8196 options = [] 8197 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8198 option = self._parse_var(any_token=True) 8199 prev = self._prev.text.upper() 8200 8201 # Different dialects might separate options and values by white space, "=" and "AS" 8202 self._match(TokenType.EQ) 8203 self._match(TokenType.ALIAS) 8204 8205 param = self.expression(exp.CopyParameter, this=option) 8206 8207 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8208 TokenType.L_PAREN, advance=False 8209 ): 8210 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8211 param.set("expressions", self._parse_wrapped_options()) 8212 elif prev == "FILE_FORMAT": 8213 # T-SQL's external file format case 8214 param.set("expression", self._parse_field()) 8215 else: 8216 param.set("expression", self._parse_unquoted_field()) 8217 8218 options.append(param) 8219 self._match(sep) 8220 8221 return options 8222 8223 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8224 expr = self.expression(exp.Credentials) 8225 8226 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8227 expr.set("storage", self._parse_field()) 8228 if self._match_text_seq("CREDENTIALS"): 8229 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8230 creds = ( 8231 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8232 ) 8233 expr.set("credentials", creds) 8234 if self._match_text_seq("ENCRYPTION"): 8235 expr.set("encryption", self._parse_wrapped_options()) 8236 if self._match_text_seq("IAM_ROLE"): 8237 expr.set("iam_role", self._parse_field()) 8238 if self._match_text_seq("REGION"): 8239 expr.set("region", self._parse_field()) 8240 8241 return expr 8242 8243 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8244 return self._parse_field() 8245 8246 def _parse_copy(self) -> exp.Copy | exp.Command: 8247 start = self._prev 8248 8249 self._match(TokenType.INTO) 8250 8251 this = ( 8252 self._parse_select(nested=True, parse_subquery_alias=False) 8253 if self._match(TokenType.L_PAREN, advance=False) 8254 else self._parse_table(schema=True) 8255 ) 8256 8257 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8258 8259 files = self._parse_csv(self._parse_file_location) 8260 credentials = self._parse_credentials() 8261 8262 self._match_text_seq("WITH") 8263 8264 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8265 8266 # Fallback case 8267 if self._curr: 8268 return self._parse_as_command(start) 8269 8270 return self.expression( 8271 exp.Copy, 8272 this=this, 8273 kind=kind, 8274 credentials=credentials, 8275 files=files, 8276 params=params, 8277 ) 8278 8279 def _parse_normalize(self) -> exp.Normalize: 8280 return self.expression( 8281 exp.Normalize, 8282 this=self._parse_bitwise(), 8283 form=self._match(TokenType.COMMA) and self._parse_var(), 8284 ) 8285 8286 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8287 args = self._parse_csv(lambda: self._parse_lambda()) 8288 8289 this = seq_get(args, 0) 8290 decimals = seq_get(args, 1) 8291 8292 return expr_type( 8293 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8294 ) 8295 8296 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8297 star_token = self._prev 8298 8299 if self._match_text_seq("COLUMNS", "(", advance=False): 8300 this = self._parse_function() 8301 if isinstance(this, exp.Columns): 8302 this.set("unpack", True) 8303 return this 8304 8305 return self.expression( 8306 exp.Star, 8307 **{ # type: ignore 8308 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8309 "replace": self._parse_star_op("REPLACE"), 8310 "rename": self._parse_star_op("RENAME"), 8311 }, 8312 ).update_positions(star_token) 8313 8314 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8315 privilege_parts = [] 8316 8317 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8318 # (end of privilege list) or L_PAREN (start of column list) are met 8319 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8320 privilege_parts.append(self._curr.text.upper()) 8321 self._advance() 8322 8323 this = exp.var(" ".join(privilege_parts)) 8324 expressions = ( 8325 self._parse_wrapped_csv(self._parse_column) 8326 if self._match(TokenType.L_PAREN, advance=False) 8327 else None 8328 ) 8329 8330 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8331 8332 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8333 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8334 principal = self._parse_id_var() 8335 8336 if not principal: 8337 return None 8338 8339 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8340 8341 def _parse_grant(self) -> exp.Grant | exp.Command: 8342 start = self._prev 8343 8344 privileges = self._parse_csv(self._parse_grant_privilege) 8345 8346 self._match(TokenType.ON) 8347 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8348 8349 # Attempt to parse the securable e.g. MySQL allows names 8350 # such as "foo.*", "*.*" which are not easily parseable yet 8351 securable = self._try_parse(self._parse_table_parts) 8352 8353 if not securable or not self._match_text_seq("TO"): 8354 return self._parse_as_command(start) 8355 8356 principals = self._parse_csv(self._parse_grant_principal) 8357 8358 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8359 8360 if self._curr: 8361 return self._parse_as_command(start) 8362 8363 return self.expression( 8364 exp.Grant, 8365 privileges=privileges, 8366 kind=kind, 8367 securable=securable, 8368 principals=principals, 8369 grant_option=grant_option, 8370 ) 8371 8372 def _parse_overlay(self) -> exp.Overlay: 8373 return self.expression( 8374 exp.Overlay, 8375 **{ # type: ignore 8376 "this": self._parse_bitwise(), 8377 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8378 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8379 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8380 }, 8381 ) 8382 8383 def _parse_format_name(self) -> exp.Property: 8384 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8385 # for FILE_FORMAT = <format_name> 8386 return self.expression( 8387 exp.Property, 8388 this=exp.var("FORMAT_NAME"), 8389 value=self._parse_string() or self._parse_table_parts(), 8390 ) 8391 8392 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8393 args: t.List[exp.Expression] = [] 8394 8395 if self._match(TokenType.DISTINCT): 8396 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8397 self._match(TokenType.COMMA) 8398 8399 args.extend(self._parse_csv(self._parse_assignment)) 8400 8401 return self.expression( 8402 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8403 ) 8404 8405 def _identifier_expression( 8406 self, token: t.Optional[Token] = None, **kwargs: t.Any 8407 ) -> exp.Identifier: 8408 token = token or self._prev 8409 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8410 expression.update_positions(token) 8411 return expression
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1619 def __init__( 1620 self, 1621 error_level: t.Optional[ErrorLevel] = None, 1622 error_message_context: int = 100, 1623 max_errors: int = 3, 1624 dialect: DialectType = None, 1625 ): 1626 from sqlglot.dialects import Dialect 1627 1628 self.error_level = error_level or ErrorLevel.IMMEDIATE 1629 self.error_message_context = error_message_context 1630 self.max_errors = max_errors 1631 self.dialect = Dialect.get_or_raise(dialect) 1632 self.reset()
1644 def parse( 1645 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1646 ) -> t.List[t.Optional[exp.Expression]]: 1647 """ 1648 Parses a list of tokens and returns a list of syntax trees, one tree 1649 per parsed SQL statement. 1650 1651 Args: 1652 raw_tokens: The list of tokens. 1653 sql: The original SQL string, used to produce helpful debug messages. 1654 1655 Returns: 1656 The list of the produced syntax trees. 1657 """ 1658 return self._parse( 1659 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1660 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1662 def parse_into( 1663 self, 1664 expression_types: exp.IntoType, 1665 raw_tokens: t.List[Token], 1666 sql: t.Optional[str] = None, 1667 ) -> t.List[t.Optional[exp.Expression]]: 1668 """ 1669 Parses a list of tokens into a given Expression type. If a collection of Expression 1670 types is given instead, this method will try to parse the token list into each one 1671 of them, stopping at the first for which the parsing succeeds. 1672 1673 Args: 1674 expression_types: The expression type(s) to try and parse the token list into. 1675 raw_tokens: The list of tokens. 1676 sql: The original SQL string, used to produce helpful debug messages. 1677 1678 Returns: 1679 The target Expression. 1680 """ 1681 errors = [] 1682 for expression_type in ensure_list(expression_types): 1683 parser = self.EXPRESSION_PARSERS.get(expression_type) 1684 if not parser: 1685 raise TypeError(f"No parser registered for {expression_type}") 1686 1687 try: 1688 return self._parse(parser, raw_tokens, sql) 1689 except ParseError as e: 1690 e.errors[0]["into_expression"] = expression_type 1691 errors.append(e) 1692 1693 raise ParseError( 1694 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1695 errors=merge_errors(errors), 1696 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1736 def check_errors(self) -> None: 1737 """Logs or raises any found errors, depending on the chosen error level setting.""" 1738 if self.error_level == ErrorLevel.WARN: 1739 for error in self.errors: 1740 logger.error(str(error)) 1741 elif self.error_level == ErrorLevel.RAISE and self.errors: 1742 raise ParseError( 1743 concat_messages(self.errors, self.max_errors), 1744 errors=merge_errors(self.errors), 1745 )
Logs or raises any found errors, depending on the chosen error level setting.
1747 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1748 """ 1749 Appends an error in the list of recorded errors or raises it, depending on the chosen 1750 error level setting. 1751 """ 1752 token = token or self._curr or self._prev or Token.string("") 1753 start = token.start 1754 end = token.end + 1 1755 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1756 highlight = self.sql[start:end] 1757 end_context = self.sql[end : end + self.error_message_context] 1758 1759 error = ParseError.new( 1760 f"{message}. Line {token.line}, Col: {token.col}.\n" 1761 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1762 description=message, 1763 line=token.line, 1764 col=token.col, 1765 start_context=start_context, 1766 highlight=highlight, 1767 end_context=end_context, 1768 ) 1769 1770 if self.error_level == ErrorLevel.IMMEDIATE: 1771 raise error 1772 1773 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1775 def expression( 1776 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1777 ) -> E: 1778 """ 1779 Creates a new, validated Expression. 1780 1781 Args: 1782 exp_class: The expression class to instantiate. 1783 comments: An optional list of comments to attach to the expression. 1784 kwargs: The arguments to set for the expression along with their respective values. 1785 1786 Returns: 1787 The target expression. 1788 """ 1789 instance = exp_class(**kwargs) 1790 instance.add_comments(comments) if comments else self._add_comments(instance) 1791 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1798 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1799 """ 1800 Validates an Expression, making sure that all its mandatory arguments are set. 1801 1802 Args: 1803 expression: The expression to validate. 1804 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1805 1806 Returns: 1807 The validated expression. 1808 """ 1809 if self.error_level != ErrorLevel.IGNORE: 1810 for error_message in expression.error_messages(args): 1811 self.raise_error(error_message) 1812 1813 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4788 def parse_set_operation(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4789 start = self._index 4790 _, side_token, kind_token = self._parse_join_parts() 4791 4792 side = side_token.text if side_token else None 4793 kind = kind_token.text if kind_token else None 4794 4795 if not self._match_set(self.SET_OPERATIONS): 4796 self._retreat(start) 4797 return None 4798 4799 token_type = self._prev.token_type 4800 4801 if token_type == TokenType.UNION: 4802 operation: t.Type[exp.SetOperation] = exp.Union 4803 elif token_type == TokenType.EXCEPT: 4804 operation = exp.Except 4805 else: 4806 operation = exp.Intersect 4807 4808 comments = self._prev.comments 4809 4810 if self._match(TokenType.DISTINCT): 4811 distinct: t.Optional[bool] = True 4812 elif self._match(TokenType.ALL): 4813 distinct = False 4814 else: 4815 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4816 if distinct is None: 4817 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4818 4819 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4820 "STRICT", "CORRESPONDING" 4821 ) 4822 if self._match_text_seq("CORRESPONDING"): 4823 by_name = True 4824 if not side and not kind: 4825 kind = "INNER" 4826 4827 on_column_list = None 4828 if by_name and self._match_texts(("ON", "BY")): 4829 on_column_list = self._parse_wrapped_csv(self._parse_column) 4830 4831 expression = self._parse_select(nested=True, parse_set_operation=False) 4832 4833 return self.expression( 4834 operation, 4835 comments=comments, 4836 this=this, 4837 distinct=distinct, 4838 by_name=by_name, 4839 expression=expression, 4840 side=side, 4841 kind=kind, 4842 on=on_column_list, 4843 )